2 жил өмнө · c61d1a066c
--- a/Makefile
+++ b/Makefile
@@ -220,7 +220,7 @@ qemu-%.tar.bz2:
 
				 
			
 
				 distclean: clean recurse-distclean
			
 
				 	-$(quiet-@)test -f build.ninja && $(NINJA) $(NINJAFLAGS) -t clean -g || :
			
 
				-	rm -f config-host.mak Makefile.prereqs qemu-bundle
			
 
				+	rm -f config-host.mak Makefile.prereqs
			
 
				 	rm -f tests/tcg/*/config-target.mak tests/tcg/config-host.mak
			
 
				 	rm -f config.status
			
 
				 	rm -f roms/seabios/config.mak
			
@@ -230,7 +230,7 @@ distclean: clean recurse-distclean
 
				 	rm -f Makefile.ninja Makefile.mtest build.ninja.stamp meson.stamp
			
 
				 	rm -f config.log
			
 
				 	rm -f linux-headers/asm
			
 
				-	rm -Rf .sdk
			
 
				+	rm -Rf .sdk qemu-bundle
			
 
				 
			
 
				 find-src-path = find "$(SRC_PATH)" -path "$(SRC_PATH)/meson" -prune -o \
			
 
				 	-type l -prune -o \( -name "*.[chsS]" -o -name "*.[ch].inc" \)
			
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3703,6 +3703,9 @@ static void kvm_accel_instance_init(Object *obj)
 
				     s->kvm_dirty_ring_size = 0;
			
 
				     s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
			
 
				     s->notify_window = 0;
			
 
				+    s->xen_version = 0;
			
 
				+    s->xen_gnttab_max_frames = 64;
			
 
				+    s->xen_evtchn_max_pirq = 256;
			
 
				 }
			
 
				 
			
 
				 /**
			
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -171,6 +171,8 @@ static int xen_init(MachineState *ms)
 
				      * opt out of system RAM being allocated by generic code
			
 
				      */
			
 
				     mc->default_ram_id = NULL;
			
 
				+
			
 
				+    xen_mode = XEN_ATTACH;
			
 
				     return 0;
			
 
				 }
			
 
				 
			
--- a/docs/system/i386/xen.rst
+++ b/docs/system/i386/xen.rst
@@ -0,0 +1,76 @@
 
				+Xen HVM guest support
			
 
				+=====================
			
 
				+
			
 
				+
			
 
				+Description
			
 
				+-----------
			
 
				+
			
 
				+KVM has support for hosting Xen guests, intercepting Xen hypercalls and event
			
 
				+channel (Xen PV interrupt) delivery. This allows guests which expect to be
			
 
				+run under Xen to be hosted in QEMU under Linux/KVM instead.
			
 
				+
			
 
				+Setup
			
 
				+-----
			
 
				+
			
 
				+Xen mode is enabled by setting the ``xen-version`` property of the KVM
			
 
				+accelerator, for example for Xen 4.10:
			
 
				+
			
 
				+.. parsed-literal::
			
 
				+
			
 
				+  |qemu_system| --accel kvm,xen-version=0x4000a
			
 
				+
			
 
				+Additionally, virtual APIC support can be advertised to the guest through the
			
 
				+``xen-vapic`` CPU flag:
			
 
				+
			
 
				+.. parsed-literal::
			
 
				+
			
 
				+  |qemu_system| --accel kvm,xen-version=0x4000a --cpu host,+xen_vapic
			
 
				+
			
 
				+When Xen support is enabled, QEMU changes hypervisor identification (CPUID
			
 
				+0x40000000..0x4000000A) to Xen. The KVM identification and features are not
			
 
				+advertised to a Xen guest. If Hyper-V is also enabled, the Xen identification
			
 
				+moves to leaves 0x40000100..0x4000010A.
			
 
				+
			
 
				+The Xen platform device is enabled automatically for a Xen guest. This allows
			
 
				+a guest to unplug all emulated devices, in order to use Xen PV block and network
			
 
				+drivers instead. Note that until the Xen PV device back ends are enabled to work
			
 
				+with Xen mode in QEMU, that is unlikely to cause significant joy. Linux guests
			
 
				+can be dissuaded from this by adding 'xen_emul_unplug=never' on their command
			
 
				+line, and it can also be noted that AHCI disk controllers are exempt from being
			
 
				+unplugged, as are passthrough VFIO PCI devices.
			
 
				+
			
 
				+Properties
			
 
				+----------
			
 
				+
			
 
				+The following properties exist on the KVM accelerator object:
			
 
				+
			
 
				+``xen-version``
			
 
				+  This property contains the Xen version in ``XENVER_version`` form, with the
			
 
				+  major version in the top 16 bits and the minor version in the low 16 bits.
			
 
				+  Setting this property enables the Xen guest support.
			
 
				+
			
 
				+``xen-evtchn-max-pirq``
			
 
				+  Xen PIRQs represent an emulated physical interrupt, either GSI or MSI, which
			
 
				+  can be routed to an event channel instead of to the emulated I/O or local
			
 
				+  APIC. By default, QEMU permits only 256 PIRQs because this allows maximum
			
 
				+  compatibility with 32-bit MSI where the higher bits of the PIRQ# would need
			
 
				+  to be in the upper 64 bits of the MSI message. For guests with large numbers
			
 
				+  of PCI devices (and none which are limited to 32-bit addressing) it may be
			
 
				+  desirable to increase this value.
			
 
				+
			
 
				+``xen-gnttab-max-frames``
			
 
				+  Xen grant tables are the means by which a Xen guest grants access to its
			
 
				+  memory for PV back ends (disk, network, etc.). Since QEMU only supports v1
			
 
				+  grant tables which are 8 bytes in size, each page (each frame) of the grant
			
 
				+  table can reference 512 pages of guest memory. The default number of frames
			
 
				+  is 64, allowing for 32768 pages of guest memory to be accessed by PV backends
			
 
				+  through simultaneous grants. For guests with large numbers of PV devices and
			
 
				+  high throughput, it may be desirable to increase this value.
			
 
				+
			
 
				+OS requirements
			
 
				+---------------
			
 
				+
			
 
				+The minimal Xen support in the KVM accelerator requires the host to be running
			
 
				+Linux v5.12 or newer. Later versions add optimisations: Linux v5.17 added
			
 
				+acceleration of interrupt delivery via the Xen PIRQ mechanism, and Linux v5.19
			
 
				+accelerated Xen PV timers and inter-processor interrupts (IPIs).
			
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -27,6 +27,7 @@ Architectural features
 
				 
			
 
				    i386/cpu
			
 
				    i386/hyperv
			
 
				+   i386/xen
			
 
				    i386/kvm-pv
			
 
				    i386/sgx
			
 
				    i386/amd-memory-encryption
			
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1815,3 +1815,32 @@ SRST
 
				   Dump the FDT in dtb format to *filename*.
			
 
				 ERST
			
 
				 #endif
			
 
				+
			
 
				+#if defined(CONFIG_XEN_EMU)
			
 
				+    {
			
 
				+        .name       = "xen-event-inject",
			
 
				+        .args_type  = "port:i",
			
 
				+        .params     = "port",
			
 
				+        .help       = "inject event channel",
			
 
				+        .cmd        = hmp_xen_event_inject,
			
 
				+    },
			
 
				+
			
 
				+SRST
			
 
				+``xen-event-inject`` *port*
			
 
				+  Notify guest via event channel on port *port*.
			
 
				+ERST
			
 
				+
			
 
				+
			
 
				+    {
			
 
				+        .name       = "xen-event-list",
			
 
				+        .args_type  = "",
			
 
				+        .params     = "",
			
 
				+        .help       = "list event channel state",
			
 
				+        .cmd        = hmp_xen_event_list,
			
 
				+    },
			
 
				+
			
 
				+SRST
			
 
				+``xen-event-list``
			
 
				+  List event channels in the guest
			
 
				+ERST
			
 
				+#endif
			
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -41,6 +41,7 @@ source tpm/Kconfig
 
				 source usb/Kconfig
			
 
				 source virtio/Kconfig
			
 
				 source vfio/Kconfig
			
 
				+source xen/Kconfig
			
 
				 source watchdog/Kconfig
			
 
				 
			
 
				 # arch Kconfig
			
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -102,6 +102,7 @@ MachineInfoList *qmp_query_machines(Error **errp)
 
				         info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
			
 
				         info->numa_mem_supported = mc->numa_mem_supported;
			
 
				         info->deprecated = !!mc->deprecation_reason;
			
 
				+        info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi");
			
 
				         if (mc->default_cpu_type) {
			
 
				             info->default_cpu_type = g_strdup(mc->default_cpu_type);
			
 
				         }
			
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -136,3 +136,8 @@ config VMPORT
 
				 config VMMOUSE
			
 
				     bool
			
 
				     depends on VMPORT
			
 
				+
			
 
				+config XEN_EMU
			
 
				+    bool
			
 
				+    default y
			
 
				+    depends on KVM && (I386 || X86_64)
			
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -4,5 +4,18 @@ i386_kvm_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c'))
 
				 i386_kvm_ss.add(when: 'CONFIG_I8254', if_true: files('i8254.c'))
			
 
				 i386_kvm_ss.add(when: 'CONFIG_I8259', if_true: files('i8259.c'))
			
 
				 i386_kvm_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c'))
			
 
				+i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
			
 
				+  'xen_overlay.c',
			
 
				+  'xen_evtchn.c',
			
 
				+  'xen_gnttab.c',
			
 
				+  'xen_xenstore.c',
			
 
				+  ))
			
 
				 
			
 
				 i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
			
 
				+
			
 
				+xen_stubs_ss = ss.source_set()
			
 
				+xen_stubs_ss.add(when: 'CONFIG_XEN_EMU', if_false: files(
			
 
				+  'xen-stubs.c',
			
 
				+))
			
 
				+
			
 
				+specific_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: xen_stubs_ss)
			
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -0,0 +1,5 @@
 
				+kvm_xen_map_pirq(int pirq, int gsi) "pirq %d gsi %d"
			
 
				+kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
			
 
				+kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
			
 
				+kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
			
 
				+kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
			
--- a/hw/i386/kvm/trace.h
+++ b/hw/i386/kvm/trace.h
@@ -0,0 +1 @@
 
				+#include "trace/trace-hw_i386_kvm.h"
			
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -0,0 +1,44 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: QMP stubs
			
 
				+ *
			
 
				+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+
			
 
				+#include "qapi/error.h"
			
 
				+#include "qapi/qapi-commands-misc-target.h"
			
 
				+
			
 
				+#include "xen_evtchn.h"
			
 
				+
			
 
				+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
			
 
				+                          uint64_t addr, uint32_t data, bool is_masked)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_remove_pci_device(PCIDevice *dev)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
			
 
				+{
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				+#ifdef TARGET_I386
			
 
				+EvtchnInfoList *qmp_xen_event_list(Error **errp)
			
 
				+{
			
 
				+    error_setg(errp, "Xen event channel emulation not enabled");
			
 
				+    return NULL;
			
 
				+}
			
 
				+
			
 
				+void qmp_xen_event_inject(uint32_t port, Error **errp)
			
 
				+{
			
 
				+    error_setg(errp, "Xen event channel emulation not enabled");
			
 
				+}
			
 
				+#endif
			
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -0,0 +1,2341 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Event channel support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+#include "qemu/host-utils.h"
			
 
				+#include "qemu/module.h"
			
 
				+#include "qemu/lockable.h"
			
 
				+#include "qemu/main-loop.h"
			
 
				+#include "qemu/log.h"
			
 
				+#include "monitor/monitor.h"
			
 
				+#include "monitor/hmp.h"
			
 
				+#include "qapi/error.h"
			
 
				+#include "qapi/qapi-commands-misc-target.h"
			
 
				+#include "qapi/qmp/qdict.h"
			
 
				+#include "qom/object.h"
			
 
				+#include "exec/target_page.h"
			
 
				+#include "exec/address-spaces.h"
			
 
				+#include "migration/vmstate.h"
			
 
				+#include "trace.h"
			
 
				+
			
 
				+#include "hw/sysbus.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "hw/i386/x86.h"
			
 
				+#include "hw/i386/pc.h"
			
 
				+#include "hw/pci/pci.h"
			
 
				+#include "hw/pci/msi.h"
			
 
				+#include "hw/pci/msix.h"
			
 
				+#include "hw/irq.h"
			
 
				+
			
 
				+#include "xen_evtchn.h"
			
 
				+#include "xen_overlay.h"
			
 
				+#include "xen_xenstore.h"
			
 
				+
			
 
				+#include "sysemu/kvm.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				+#include <linux/kvm.h>
			
 
				+#include <sys/eventfd.h>
			
 
				+
			
 
				+#include "hw/xen/interface/memory.h"
			
 
				+#include "hw/xen/interface/hvm/params.h"
			
 
				+
			
 
				+/* XX: For kvm_update_msi_routes_all() */
			
 
				+#include "target/i386/kvm/kvm_i386.h"
			
 
				+
			
 
				+#define TYPE_XEN_EVTCHN "xen-evtchn"
			
 
				+OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
			
 
				+
			
 
				+typedef struct XenEvtchnPort {
			
 
				+    uint32_t vcpu;      /* Xen/ACPI vcpu_id */
			
 
				+    uint16_t type;      /* EVTCHNSTAT_xxxx */
			
 
				+    uint16_t type_val;  /* pirq# / virq# / remote port according to type */
			
 
				+} XenEvtchnPort;
			
 
				+
			
 
				+/* 32-bit compatibility definitions, also used natively in 32-bit build */
			
 
				+struct compat_arch_vcpu_info {
			
 
				+    unsigned int cr2;
			
 
				+    unsigned int pad[5];
			
 
				+};
			
 
				+
			
 
				+struct compat_vcpu_info {
			
 
				+    uint8_t evtchn_upcall_pending;
			
 
				+    uint8_t evtchn_upcall_mask;
			
 
				+    uint16_t pad;
			
 
				+    uint32_t evtchn_pending_sel;
			
 
				+    struct compat_arch_vcpu_info arch;
			
 
				+    struct vcpu_time_info time;
			
 
				+}; /* 64 bytes (x86) */
			
 
				+
			
 
				+struct compat_arch_shared_info {
			
 
				+    unsigned int max_pfn;
			
 
				+    unsigned int pfn_to_mfn_frame_list_list;
			
 
				+    unsigned int nmi_reason;
			
 
				+    unsigned int p2m_cr3;
			
 
				+    unsigned int p2m_vaddr;
			
 
				+    unsigned int p2m_generation;
			
 
				+    uint32_t wc_sec_hi;
			
 
				+};
			
 
				+
			
 
				+struct compat_shared_info {
			
 
				+    struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
			
 
				+    uint32_t evtchn_pending[32];
			
 
				+    uint32_t evtchn_mask[32];
			
 
				+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
			
 
				+    uint32_t wc_sec;
			
 
				+    uint32_t wc_nsec;
			
 
				+    struct compat_arch_shared_info arch;
			
 
				+};
			
 
				+
			
 
				+#define COMPAT_EVTCHN_2L_NR_CHANNELS            1024
			
 
				+
			
 
				+/* Local private implementation of struct xenevtchn_handle */
			
 
				+struct xenevtchn_handle {
			
 
				+    evtchn_port_t be_port;
			
 
				+    evtchn_port_t guest_port; /* Or zero for unbound */
			
 
				+    int fd;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * For unbound/interdomain ports there are only two possible remote
			
 
				+ * domains; self and QEMU. Use a single high bit in type_val for that,
			
 
				+ * and the low bits for the remote port number (or 0 for unbound).
			
 
				+ */
			
 
				+#define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
			
 
				+#define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
			
 
				+
			
 
				+/*
			
 
				+ * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
			
 
				+ * insane enough to think about guest-transparent live migration from actual
			
 
				+ * Xen to QEMU, and ensuring that we can convert/consume the stream.
			
 
				+ */
			
 
				+#define IRQ_UNBOUND -1
			
 
				+#define IRQ_PT -2
			
 
				+#define IRQ_MSI_EMU -3
			
 
				+
			
 
				+
			
 
				+struct pirq_info {
			
 
				+    int gsi;
			
 
				+    uint16_t port;
			
 
				+    PCIDevice *dev;
			
 
				+    int vector;
			
 
				+    bool is_msix;
			
 
				+    bool is_masked;
			
 
				+    bool is_translated;
			
 
				+};
			
 
				+
			
 
				+struct XenEvtchnState {
			
 
				+    /*< private >*/
			
 
				+    SysBusDevice busdev;
			
 
				+    /*< public >*/
			
 
				+
			
 
				+    uint64_t callback_param;
			
 
				+    bool evtchn_in_kernel;
			
 
				+    uint32_t callback_gsi;
			
 
				+
			
 
				+    QEMUBH *gsi_bh;
			
 
				+
			
 
				+    QemuMutex port_lock;
			
 
				+    uint32_t nr_ports;
			
 
				+    XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
			
 
				+    qemu_irq gsis[IOAPIC_NUM_PINS];
			
 
				+
			
 
				+    struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
			
 
				+
			
 
				+    uint32_t nr_pirqs;
			
 
				+
			
 
				+    /* Bitmap of allocated PIRQs (serialized) */
			
 
				+    uint16_t nr_pirq_inuse_words;
			
 
				+    uint64_t *pirq_inuse_bitmap;
			
 
				+
			
 
				+    /* GSI → PIRQ mapping (serialized) */
			
 
				+    uint16_t gsi_pirq[IOAPIC_NUM_PINS];
			
 
				+
			
 
				+    /* Per-GSI assertion state (serialized) */
			
 
				+    uint32_t pirq_gsi_set;
			
 
				+
			
 
				+    /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
			
 
				+    struct pirq_info *pirq;
			
 
				+};
			
 
				+
			
 
				+#define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
			
 
				+#define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
			
 
				+
			
 
				+#define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
			
 
				+
			
 
				+struct XenEvtchnState *xen_evtchn_singleton;
			
 
				+
			
 
				+/* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
			
 
				+#define CALLBACK_VIA_TYPE_SHIFT 56
			
 
				+
			
 
				+static void unbind_backend_ports(XenEvtchnState *s);
			
 
				+
			
 
				+static int xen_evtchn_pre_load(void *opaque)
			
 
				+{
			
 
				+    XenEvtchnState *s = opaque;
			
 
				+
			
 
				+    /* Unbind all the backend-side ports; they need to rebind */
			
 
				+    unbind_backend_ports(s);
			
 
				+
			
 
				+    /* It'll be leaked otherwise. */
			
 
				+    g_free(s->pirq_inuse_bitmap);
			
 
				+    s->pirq_inuse_bitmap = NULL;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int xen_evtchn_post_load(void *opaque, int version_id)
			
 
				+{
			
 
				+    XenEvtchnState *s = opaque;
			
 
				+    uint32_t i;
			
 
				+
			
 
				+    if (s->callback_param) {
			
 
				+        xen_evtchn_set_callback_param(s->callback_param);
			
 
				+    }
			
 
				+
			
 
				+    /* Rebuild s->pirq[].port mapping */
			
 
				+    for (i = 0; i < s->nr_ports; i++) {
			
 
				+        XenEvtchnPort *p = &s->port_table[i];
			
 
				+
			
 
				+        if (p->type == EVTCHNSTAT_pirq) {
			
 
				+            assert(p->type_val);
			
 
				+            assert(p->type_val < s->nr_pirqs);
			
 
				+
			
 
				+            /*
			
 
				+             * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
			
 
				+             * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
			
 
				+             * catches up with it.
			
 
				+             */
			
 
				+            s->pirq[p->type_val].gsi = IRQ_UNBOUND;
			
 
				+            s->pirq[p->type_val].port = i;
			
 
				+        }
			
 
				+    }
			
 
				+    /* Rebuild s->pirq[].gsi mapping */
			
 
				+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
			
 
				+        if (s->gsi_pirq[i]) {
			
 
				+            s->pirq[s->gsi_pirq[i]].gsi = i;
			
 
				+        }
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool xen_evtchn_is_needed(void *opaque)
			
 
				+{
			
 
				+    return xen_mode == XEN_EMULATE;
			
 
				+}
			
 
				+
			
 
				+static const VMStateDescription xen_evtchn_port_vmstate = {
			
 
				+    .name = "xen_evtchn_port",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT32(vcpu, XenEvtchnPort),
			
 
				+        VMSTATE_UINT16(type, XenEvtchnPort),
			
 
				+        VMSTATE_UINT16(type_val, XenEvtchnPort),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+static const VMStateDescription xen_evtchn_vmstate = {
			
 
				+    .name = "xen_evtchn",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .needed = xen_evtchn_is_needed,
			
 
				+    .pre_load = xen_evtchn_pre_load,
			
 
				+    .post_load = xen_evtchn_post_load,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT64(callback_param, XenEvtchnState),
			
 
				+        VMSTATE_UINT32(nr_ports, XenEvtchnState),
			
 
				+        VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
			
 
				+                                     xen_evtchn_port_vmstate, XenEvtchnPort),
			
 
				+        VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
			
 
				+        VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
			
 
				+                                    nr_pirq_inuse_words, 0,
			
 
				+                                    vmstate_info_uint64, uint64_t),
			
 
				+        VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+static void xen_evtchn_class_init(ObjectClass *klass, void *data)
			
 
				+{
			
 
				+    DeviceClass *dc = DEVICE_CLASS(klass);
			
 
				+
			
 
				+    dc->vmsd = &xen_evtchn_vmstate;
			
 
				+}
			
 
				+
			
 
				+static const TypeInfo xen_evtchn_info = {
			
 
				+    .name          = TYPE_XEN_EVTCHN,
			
 
				+    .parent        = TYPE_SYS_BUS_DEVICE,
			
 
				+    .instance_size = sizeof(XenEvtchnState),
			
 
				+    .class_init    = xen_evtchn_class_init,
			
 
				+};
			
 
				+
			
 
				+static void gsi_assert_bh(void *opaque)
			
 
				+{
			
 
				+    struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
			
 
				+    if (vi) {
			
 
				+        xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_create(void)
			
 
				+{
			
 
				+    XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
			
 
				+                                                        -1, NULL));
			
 
				+    int i;
			
 
				+
			
 
				+    xen_evtchn_singleton = s;
			
 
				+
			
 
				+    qemu_mutex_init(&s->port_lock);
			
 
				+    s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
			
 
				+
			
 
				+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
			
 
				+        sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * The Xen scheme for encoding PIRQ# into an MSI message is not
			
 
				+     * compatible with 32-bit MSI, as it puts the high bits of the
			
 
				+     * PIRQ# into the high bits of the MSI message address, instead of
			
 
				+     * using the Extended Destination ID in address bits 4-11 which
			
 
				+     * perhaps would have been a better choice.
			
 
				+     *
			
 
				+     * To keep life simple, kvm_accel_instance_init() initialises the
			
 
				+     * default to 256. which conveniently doesn't need to set anything
			
 
				+     * outside the low 32 bits of the address. It can be increased by
			
 
				+     * setting the xen-evtchn-max-pirq property.
			
 
				+     */
			
 
				+    s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
			
 
				+
			
 
				+    s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
			
 
				+    s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
			
 
				+    s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int i;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
			
 
				+        sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void xen_evtchn_register_types(void)
			
 
				+{
			
 
				+    type_register_static(&xen_evtchn_info);
			
 
				+}
			
 
				+
			
 
				+type_init(xen_evtchn_register_types)
			
 
				+
			
 
				+static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
			
 
				+{
			
 
				+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
			
 
				+    uint8_t pin = param & 3;
			
 
				+    uint8_t devfn = (param >> 8) & 0xff;
			
 
				+    uint16_t bus = (param >> 16) & 0xffff;
			
 
				+    uint16_t domain = (param >> 32) & 0xffff;
			
 
				+    PCIDevice *pdev;
			
 
				+    PCIINTxRoute r;
			
 
				+
			
 
				+    if (domain || !pcms) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    pdev = pci_find_device(pcms->bus, bus, devfn);
			
 
				+    if (!pdev) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    r = pci_device_route_intx_to_irq(pdev, pin);
			
 
				+    if (r.mode != PCI_INTX_ENABLED) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Hm, can we be notified of INTX routing changes? Not without
			
 
				+     * *owning* the device and being allowed to overwrite its own
			
 
				+     * ->intx_routing_notifier, AFAICT. So let's not.
			
 
				+     */
			
 
				+    return r.irq;
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_set_callback_level(int level)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    if (!s) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * We get to this function in a number of ways:
			
 
				+     *
			
 
				+     *  • From I/O context, via PV backend drivers sending a notification to
			
 
				+     *    the guest.
			
 
				+     *
			
 
				+     *  • From guest vCPU context, via loopback interdomain event channels
			
 
				+     *    (or theoretically even IPIs but guests don't use those with GSI
			
 
				+     *    delivery because that's pointless. We don't want a malicious guest
			
 
				+     *    to be able to trigger a deadlock though, so we can't rule it out.)
			
 
				+     *
			
 
				+     *  • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
			
 
				+     *    configured.
			
 
				+     *
			
 
				+     *  • From guest vCPU context in the KVM exit handler, if the upcall
			
 
				+     *    pending flag has been cleared and the GSI needs to be deasserted.
			
 
				+     *
			
 
				+     *  • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
			
 
				+     *    been acked in the irqchip.
			
 
				+     *
			
 
				+     * Whichever context we come from if we aren't already holding the BQL
			
 
				+     * then e can't take it now, as we may already hold s->port_lock. So
			
 
				+     * trigger the BH to set the IRQ for us instead of doing it immediately.
			
 
				+     *
			
 
				+     * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
			
 
				+     * will deliberately take the BQL because they want the change to take
			
 
				+     * effect immediately. That just leaves interdomain loopback as the case
			
 
				+     * which uses the BH.
			
 
				+     */
			
 
				+    if (!qemu_mutex_iothread_locked()) {
			
 
				+        qemu_bh_schedule(s->gsi_bh);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (s->callback_gsi && s->callback_gsi < IOAPIC_NUM_PINS) {
			
 
				+        qemu_set_irq(s->gsis[s->callback_gsi], level);
			
 
				+        if (level) {
			
 
				+            /* Ensure the vCPU polls for deassertion */
			
 
				+            kvm_xen_set_callback_asserted();
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_set_callback_param(uint64_t param)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    struct kvm_xen_hvm_attr xa = {
			
 
				+        .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
			
 
				+        .u.vector = 0,
			
 
				+    };
			
 
				+    bool in_kernel = false;
			
 
				+    uint32_t gsi = 0;
			
 
				+    int type = param >> CALLBACK_VIA_TYPE_SHIFT;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * We need the BQL because set_callback_pci_intx() may call into PCI code,
			
 
				+     * and because we may need to manipulate the old and new GSI levels.
			
 
				+     */
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    switch (type) {
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
			
 
				+        xa.u.vector = (uint8_t)param,
			
 
				+
			
 
				+        ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
			
 
				+        if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+            in_kernel = true;
			
 
				+        }
			
 
				+        gsi = 0;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
			
 
				+        gsi = set_callback_pci_intx(s, param);
			
 
				+        ret = gsi ? 0 : -EINVAL;
			
 
				+        break;
			
 
				+
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_GSI:
			
 
				+        gsi = (uint32_t)param;
			
 
				+        ret = 0;
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        /* Xen doesn't return error even if you set something bogus */
			
 
				+        ret = 0;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    if (!ret) {
			
 
				+        /* If vector delivery was turned *off* then tell the kernel */
			
 
				+        if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
			
 
				+            HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
			
 
				+            kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
			
 
				+        }
			
 
				+        s->callback_param = param;
			
 
				+        s->evtchn_in_kernel = in_kernel;
			
 
				+
			
 
				+        if (gsi != s->callback_gsi) {
			
 
				+            struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
			
 
				+
			
 
				+            xen_evtchn_set_callback_level(0);
			
 
				+            s->callback_gsi = gsi;
			
 
				+
			
 
				+            if (gsi && vi && vi->evtchn_upcall_pending) {
			
 
				+                kvm_xen_inject_vcpu_callback_vector(0, type);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
			
 
				+{
			
 
				+    int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
			
 
				+
			
 
				+    kvm_xen_inject_vcpu_callback_vector(vcpu, type);
			
 
				+}
			
 
				+
			
 
				+static void deassign_kernel_port(evtchn_port_t port)
			
 
				+{
			
 
				+    struct kvm_xen_hvm_attr ha;
			
 
				+    int ret;
			
 
				+
			
 
				+    ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
			
 
				+    ha.u.evtchn.send_port = port;
			
 
				+    ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
			
 
				+
			
 
				+    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
			
 
				+    if (ret) {
			
 
				+        qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
			
 
				+                      port, strerror(ret));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int assign_kernel_port(uint16_t type, evtchn_port_t port,
			
 
				+                              uint32_t vcpu_id)
			
 
				+{
			
 
				+    CPUState *cpu = qemu_get_cpu(vcpu_id);
			
 
				+    struct kvm_xen_hvm_attr ha;
			
 
				+
			
 
				+    if (!cpu) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
			
 
				+    ha.u.evtchn.send_port = port;
			
 
				+    ha.u.evtchn.type = type;
			
 
				+    ha.u.evtchn.flags = 0;
			
 
				+    ha.u.evtchn.deliver.port.port = port;
			
 
				+    ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
			
 
				+    ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
			
 
				+
			
 
				+    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
			
 
				+}
			
 
				+
			
 
				+static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
			
 
				+{
			
 
				+    struct kvm_xen_hvm_attr ha;
			
 
				+
			
 
				+    ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
			
 
				+    ha.u.evtchn.send_port = port;
			
 
				+    ha.u.evtchn.type = type;
			
 
				+    ha.u.evtchn.flags = 0;
			
 
				+    ha.u.evtchn.deliver.eventfd.port = 0;
			
 
				+    ha.u.evtchn.deliver.eventfd.fd = fd;
			
 
				+
			
 
				+    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
			
 
				+}
			
 
				+
			
 
				+static bool valid_port(evtchn_port_t port)
			
 
				+{
			
 
				+    if (!port) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_is_long_mode()) {
			
 
				+        return port < EVTCHN_2L_NR_CHANNELS;
			
 
				+    } else {
			
 
				+        return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static bool valid_vcpu(uint32_t vcpu)
			
 
				+{
			
 
				+    return !!qemu_get_cpu(vcpu);
			
 
				+}
			
 
				+
			
 
				+static void unbind_backend_ports(XenEvtchnState *s)
			
 
				+{
			
 
				+    XenEvtchnPort *p;
			
 
				+    int i;
			
 
				+
			
 
				+    for (i = 1; i < s->nr_ports; i++) {
			
 
				+        p = &s->port_table[i];
			
 
				+        if (p->type == EVTCHNSTAT_interdomain &&
			
 
				+            (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU)) {
			
 
				+            evtchn_port_t be_port = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
			
 
				+
			
 
				+            if (s->be_handles[be_port]) {
			
 
				+                /* This part will be overwritten on the load anyway. */
			
 
				+                p->type = EVTCHNSTAT_unbound;
			
 
				+                p->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+
			
 
				+                /* Leave the backend port open and unbound too. */
			
 
				+                if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+                    deassign_kernel_port(i);
			
 
				+                }
			
 
				+                s->be_handles[be_port]->guest_port = 0;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_status_op(struct evtchn_status *status)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    XenEvtchnPort *p;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (status->dom != DOMID_SELF && status->dom != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(status->port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    p = &s->port_table[status->port];
			
 
				+
			
 
				+    status->status = p->type;
			
 
				+    status->vcpu = p->vcpu;
			
 
				+
			
 
				+    switch (p->type) {
			
 
				+    case EVTCHNSTAT_unbound:
			
 
				+        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
			
 
				+            status->u.unbound.dom = DOMID_QEMU;
			
 
				+        } else {
			
 
				+            status->u.unbound.dom = xen_domid;
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_interdomain:
			
 
				+        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
			
 
				+            status->u.interdomain.dom = DOMID_QEMU;
			
 
				+        } else {
			
 
				+            status->u.interdomain.dom = xen_domid;
			
 
				+        }
			
 
				+
			
 
				+        status->u.interdomain.port = p->type_val &
			
 
				+            PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_pirq:
			
 
				+        status->u.pirq = p->type_val;
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_virq:
			
 
				+        status->u.virq = p->type_val;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Never thought I'd hear myself say this, but C++ templates would be
			
 
				+ * kind of nice here.
			
 
				+ *
			
 
				+ * template<class T> static int do_unmask_port(T *shinfo, ...);
			
 
				+ */
			
 
				+static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
			
 
				+                             bool do_unmask, struct shared_info *shinfo,
			
 
				+                             struct vcpu_info *vcpu_info)
			
 
				+{
			
 
				+    const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+    typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+    int idx = port / bits_per_word;
			
 
				+    int offset = port % bits_per_word;
			
 
				+
			
 
				+    mask = 1UL << offset;
			
 
				+
			
 
				+    if (idx >= bits_per_word) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (do_unmask) {
			
 
				+        /*
			
 
				+         * If this is a true unmask operation, clear the mask bit. If
			
 
				+         * it was already unmasked, we have nothing further to do.
			
 
				+         */
			
 
				+        if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+    } else {
			
 
				+        /*
			
 
				+         * This is a pseudo-unmask for affinity changes. We don't
			
 
				+         * change the mask bit, and if it's *masked* we have nothing
			
 
				+         * else to do.
			
 
				+         */
			
 
				+        if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /* If the event was not pending, we're done. */
			
 
				+    if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Now on to the vcpu_info evtchn_pending_sel index... */
			
 
				+    mask = 1UL << idx;
			
 
				+
			
 
				+    /* If a port in this word was already pending for this vCPU, all done. */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Set evtchn_upcall_pending for this vCPU */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    inject_callback(s, s->port_table[port].vcpu);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
			
 
				+                                 bool do_unmask,
			
 
				+                                 struct compat_shared_info *shinfo,
			
 
				+                                 struct compat_vcpu_info *vcpu_info)
			
 
				+{
			
 
				+    const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+    typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+    int idx = port / bits_per_word;
			
 
				+    int offset = port % bits_per_word;
			
 
				+
			
 
				+    mask = 1UL << offset;
			
 
				+
			
 
				+    if (idx >= bits_per_word) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (do_unmask) {
			
 
				+        /*
			
 
				+         * If this is a true unmask operation, clear the mask bit. If
			
 
				+         * it was already unmasked, we have nothing further to do.
			
 
				+         */
			
 
				+        if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+    } else {
			
 
				+        /*
			
 
				+         * This is a pseudo-unmask for affinity changes. We don't
			
 
				+         * change the mask bit, and if it's *masked* we have nothing
			
 
				+         * else to do.
			
 
				+         */
			
 
				+        if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /* If the event was not pending, we're done. */
			
 
				+    if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Now on to the vcpu_info evtchn_pending_sel index... */
			
 
				+    mask = 1UL << idx;
			
 
				+
			
 
				+    /* If a port in this word was already pending for this vCPU, all done. */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Set evtchn_upcall_pending for this vCPU */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    inject_callback(s, s->port_table[port].vcpu);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
			
 
				+{
			
 
				+    void *vcpu_info, *shinfo;
			
 
				+
			
 
				+    if (s->port_table[port].type == EVTCHNSTAT_closed) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    shinfo = xen_overlay_get_shinfo_ptr();
			
 
				+    if (!shinfo) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
			
 
				+    if (!vcpu_info) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_is_long_mode()) {
			
 
				+        return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
			
 
				+    } else {
			
 
				+        return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
			
 
				+                          struct shared_info *shinfo,
			
 
				+                          struct vcpu_info *vcpu_info)
			
 
				+{
			
 
				+    const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+    typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+    int idx = port / bits_per_word;
			
 
				+    int offset = port % bits_per_word;
			
 
				+
			
 
				+    mask = 1UL << offset;
			
 
				+
			
 
				+    if (idx >= bits_per_word) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /* Update the pending bit itself. If it was already set, we're done. */
			
 
				+    if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Check if it's masked. */
			
 
				+    if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Now on to the vcpu_info evtchn_pending_sel index... */
			
 
				+    mask = 1UL << idx;
			
 
				+
			
 
				+    /* If a port in this word was already pending for this vCPU, all done. */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Set evtchn_upcall_pending for this vCPU */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    inject_callback(s, s->port_table[port].vcpu);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
			
 
				+                              struct compat_shared_info *shinfo,
			
 
				+                              struct compat_vcpu_info *vcpu_info)
			
 
				+{
			
 
				+    const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+    typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+    int idx = port / bits_per_word;
			
 
				+    int offset = port % bits_per_word;
			
 
				+
			
 
				+    mask = 1UL << offset;
			
 
				+
			
 
				+    if (idx >= bits_per_word) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /* Update the pending bit itself. If it was already set, we're done. */
			
 
				+    if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Check if it's masked. */
			
 
				+    if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Now on to the vcpu_info evtchn_pending_sel index... */
			
 
				+    mask = 1UL << idx;
			
 
				+
			
 
				+    /* If a port in this word was already pending for this vCPU, all done. */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /* Set evtchn_upcall_pending for this vCPU */
			
 
				+    if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    inject_callback(s, s->port_table[port].vcpu);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
			
 
				+{
			
 
				+    void *vcpu_info, *shinfo;
			
 
				+
			
 
				+    if (s->port_table[port].type == EVTCHNSTAT_closed) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (s->evtchn_in_kernel) {
			
 
				+        XenEvtchnPort *p = &s->port_table[port];
			
 
				+        CPUState *cpu = qemu_get_cpu(p->vcpu);
			
 
				+        struct kvm_irq_routing_xen_evtchn evt;
			
 
				+
			
 
				+        if (!cpu) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+
			
 
				+        evt.port = port;
			
 
				+        evt.vcpu = kvm_arch_vcpu_id(cpu);
			
 
				+        evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
			
 
				+
			
 
				+        return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
			
 
				+    }
			
 
				+
			
 
				+    shinfo = xen_overlay_get_shinfo_ptr();
			
 
				+    if (!shinfo) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
			
 
				+    if (!vcpu_info) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_is_long_mode()) {
			
 
				+        return do_set_port_lm(s, port, shinfo, vcpu_info);
			
 
				+    } else {
			
 
				+        return do_set_port_compat(s, port, shinfo, vcpu_info);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
			
 
				+{
			
 
				+    void *p = xen_overlay_get_shinfo_ptr();
			
 
				+
			
 
				+    if (!p) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_is_long_mode()) {
			
 
				+        struct shared_info *shinfo = p;
			
 
				+        const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+        typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+        int idx = port / bits_per_word;
			
 
				+        int offset = port % bits_per_word;
			
 
				+
			
 
				+        mask = 1UL << offset;
			
 
				+
			
 
				+        qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
			
 
				+    } else {
			
 
				+        struct compat_shared_info *shinfo = p;
			
 
				+        const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
			
 
				+        typeof(shinfo->evtchn_pending[0]) mask;
			
 
				+        int idx = port / bits_per_word;
			
 
				+        int offset = port % bits_per_word;
			
 
				+
			
 
				+        mask = 1UL << offset;
			
 
				+
			
 
				+        qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static void free_port(XenEvtchnState *s, evtchn_port_t port)
			
 
				+{
			
 
				+    s->port_table[port].type = EVTCHNSTAT_closed;
			
 
				+    s->port_table[port].type_val = 0;
			
 
				+    s->port_table[port].vcpu = 0;
			
 
				+
			
 
				+    if (s->nr_ports == port + 1) {
			
 
				+        do {
			
 
				+            s->nr_ports--;
			
 
				+        } while (s->nr_ports &&
			
 
				+                 s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
			
 
				+    }
			
 
				+
			
 
				+    /* Clear pending event to avoid unexpected behavior on re-bind. */
			
 
				+    clear_port_pending(s, port);
			
 
				+}
			
 
				+
			
 
				+static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
			
 
				+                         uint16_t val, evtchn_port_t *port)
			
 
				+{
			
 
				+    evtchn_port_t p = 1;
			
 
				+
			
 
				+    for (p = 1; valid_port(p); p++) {
			
 
				+        if (s->port_table[p].type == EVTCHNSTAT_closed) {
			
 
				+            s->port_table[p].vcpu = vcpu;
			
 
				+            s->port_table[p].type = type;
			
 
				+            s->port_table[p].type_val = val;
			
 
				+
			
 
				+            *port = p;
			
 
				+
			
 
				+            if (s->nr_ports < p + 1) {
			
 
				+                s->nr_ports = p + 1;
			
 
				+            }
			
 
				+
			
 
				+            return 0;
			
 
				+        }
			
 
				+    }
			
 
				+    return -ENOSPC;
			
 
				+}
			
 
				+
			
 
				+static bool virq_is_global(uint32_t virq)
			
 
				+{
			
 
				+    switch (virq) {
			
 
				+    case VIRQ_TIMER:
			
 
				+    case VIRQ_DEBUG:
			
 
				+    case VIRQ_XENOPROF:
			
 
				+    case VIRQ_XENPMU:
			
 
				+        return false;
			
 
				+
			
 
				+    default:
			
 
				+        return true;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int close_port(XenEvtchnState *s, evtchn_port_t port,
			
 
				+                      bool *flush_kvm_routes)
			
 
				+{
			
 
				+    XenEvtchnPort *p = &s->port_table[port];
			
 
				+
			
 
				+    /* Because it *might* be a PIRQ port */
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    switch (p->type) {
			
 
				+    case EVTCHNSTAT_closed:
			
 
				+        return -ENOENT;
			
 
				+
			
 
				+    case EVTCHNSTAT_pirq:
			
 
				+        s->pirq[p->type_val].port = 0;
			
 
				+        if (s->pirq[p->type_val].is_translated) {
			
 
				+            *flush_kvm_routes = true;
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_virq:
			
 
				+        kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
			
 
				+                              p->type_val, 0);
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_ipi:
			
 
				+        if (s->evtchn_in_kernel) {
			
 
				+            deassign_kernel_port(port);
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_interdomain:
			
 
				+        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
			
 
				+            uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+            struct xenevtchn_handle *xc = s->be_handles[be_port];
			
 
				+            if (xc) {
			
 
				+                if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+                    deassign_kernel_port(port);
			
 
				+                }
			
 
				+                xc->guest_port = 0;
			
 
				+            }
			
 
				+        } else {
			
 
				+            /* Loopback interdomain */
			
 
				+            XenEvtchnPort *rp = &s->port_table[p->type_val];
			
 
				+            if (!valid_port(p->type_val) || rp->type_val != port ||
			
 
				+                rp->type != EVTCHNSTAT_interdomain) {
			
 
				+                error_report("Inconsistent state for interdomain unbind");
			
 
				+            } else {
			
 
				+                /* Set the other end back to unbound */
			
 
				+                rp->type = EVTCHNSTAT_unbound;
			
 
				+                rp->type_val = 0;
			
 
				+            }
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    free_port(s, port);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_soft_reset(void)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    bool flush_kvm_routes;
			
 
				+    int i;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    for (i = 0; i < s->nr_ports; i++) {
			
 
				+        close_port(s, i, &flush_kvm_routes);
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    if (flush_kvm_routes) {
			
 
				+        kvm_update_msi_routes_all(NULL, true, 0, 0);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_reset_op(struct evtchn_reset *reset)
			
 
				+{
			
 
				+    if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    return xen_evtchn_soft_reset();
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_close_op(struct evtchn_close *close)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    bool flush_kvm_routes = false;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(close->port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = close_port(s, close->port, &flush_kvm_routes);
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    if (flush_kvm_routes) {
			
 
				+        kvm_update_msi_routes_all(NULL, true, 0, 0);
			
 
				+    }
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(unmask->port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = unmask_port(s, unmask->port, true);
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    XenEvtchnPort *p;
			
 
				+    int ret = -EINVAL;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(vcpu->port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_vcpu(vcpu->vcpu)) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    p = &s->port_table[vcpu->port];
			
 
				+
			
 
				+    if (p->type == EVTCHNSTAT_interdomain ||
			
 
				+        p->type == EVTCHNSTAT_unbound ||
			
 
				+        p->type == EVTCHNSTAT_pirq ||
			
 
				+        (p->type == EVTCHNSTAT_virq && virq_is_global(p->type_val))) {
			
 
				+        /*
			
 
				+         * unmask_port() with do_unmask==false will just raise the event
			
 
				+         * on the new vCPU if the port was already pending.
			
 
				+         */
			
 
				+        p->vcpu = vcpu->vcpu;
			
 
				+        unmask_port(s, vcpu->port, false);
			
 
				+        ret = 0;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (virq->virq >= NR_VIRQS) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /* Global VIRQ must be allocated on vCPU0 first */
			
 
				+    if (virq_is_global(virq->virq) && virq->vcpu != 0) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_vcpu(virq->vcpu)) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
			
 
				+                        &virq->port);
			
 
				+    if (!ret) {
			
 
				+        ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
			
 
				+        if (ret) {
			
 
				+            free_port(s, virq->port);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (pirq->pirq >= s->nr_pirqs) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+
			
 
				+    if (s->pirq[pirq->pirq].port) {
			
 
				+        return -EBUSY;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
			
 
				+                        &pirq->port);
			
 
				+    if (ret) {
			
 
				+        qemu_mutex_unlock(&s->port_lock);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    s->pirq[pirq->pirq].port = pirq->port;
			
 
				+    trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    /*
			
 
				+     * Need to do the unmask outside port_lock because it may call
			
 
				+     * back into the MSI translate function.
			
 
				+     */
			
 
				+    if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
			
 
				+        if (s->pirq[pirq->pirq].is_masked) {
			
 
				+            PCIDevice *dev = s->pirq[pirq->pirq].dev;
			
 
				+            int vector = s->pirq[pirq->pirq].vector;
			
 
				+            char *dev_path = qdev_get_dev_path(DEVICE(dev));
			
 
				+
			
 
				+            trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
			
 
				+            g_free(dev_path);
			
 
				+
			
 
				+            if (s->pirq[pirq->pirq].is_msix) {
			
 
				+                msix_set_mask(dev, vector, false);
			
 
				+            } else {
			
 
				+                msi_set_mask(dev, vector, false, NULL);
			
 
				+            }
			
 
				+        } else if (s->pirq[pirq->pirq].is_translated) {
			
 
				+            /*
			
 
				+             * If KVM had attempted to translate this one before, make it try
			
 
				+             * again. If we unmasked, then the notifier on the MSI(-X) vector
			
 
				+             * will already have had the same effect.
			
 
				+             */
			
 
				+            kvm_update_msi_routes_all(NULL, true, 0, 0);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_vcpu(ipi->vcpu)) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
			
 
				+    if (!ret && s->evtchn_in_kernel) {
			
 
				+        assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    uint16_t type_val;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (interdomain->remote_dom == DOMID_QEMU) {
			
 
				+        type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+    } else if (interdomain->remote_dom == DOMID_SELF ||
			
 
				+               interdomain->remote_dom == xen_domid) {
			
 
				+        type_val = 0;
			
 
				+    } else {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(interdomain->remote_port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    /* The newly allocated port starts out as unbound */
			
 
				+    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val,
			
 
				+                        &interdomain->local_port);
			
 
				+    if (ret) {
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    if (interdomain->remote_dom == DOMID_QEMU) {
			
 
				+        struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
			
 
				+        XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
			
 
				+
			
 
				+        if (!xc) {
			
 
				+            ret = -ENOENT;
			
 
				+            goto out_free_port;
			
 
				+        }
			
 
				+
			
 
				+        if (xc->guest_port) {
			
 
				+            ret = -EBUSY;
			
 
				+            goto out_free_port;
			
 
				+        }
			
 
				+
			
 
				+        assert(xc->be_port == interdomain->remote_port);
			
 
				+        xc->guest_port = interdomain->local_port;
			
 
				+        if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+            assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
			
 
				+        }
			
 
				+        lp->type = EVTCHNSTAT_interdomain;
			
 
				+        lp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU | interdomain->remote_port;
			
 
				+        ret = 0;
			
 
				+    } else {
			
 
				+        /* Loopback */
			
 
				+        XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
			
 
				+        XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
			
 
				+
			
 
				+        if (rp->type == EVTCHNSTAT_unbound && rp->type_val == 0) {
			
 
				+            /* It's a match! */
			
 
				+            rp->type = EVTCHNSTAT_interdomain;
			
 
				+            rp->type_val = interdomain->local_port;
			
 
				+
			
 
				+            lp->type = EVTCHNSTAT_interdomain;
			
 
				+            lp->type_val = interdomain->remote_port;
			
 
				+        } else {
			
 
				+            ret = -EINVAL;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+ out_free_port:
			
 
				+    if (ret) {
			
 
				+        free_port(s, interdomain->local_port);
			
 
				+    }
			
 
				+ out:
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+
			
 
				+}
			
 
				+int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    uint16_t type_val;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    if (alloc->remote_dom == DOMID_QEMU) {
			
 
				+        type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+    } else if (alloc->remote_dom == DOMID_SELF ||
			
 
				+               alloc->remote_dom == xen_domid) {
			
 
				+        type_val = 0;
			
 
				+    } else {
			
 
				+        return -EPERM;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    ret = allocate_port(s, 0, EVTCHNSTAT_unbound, type_val, &alloc->port);
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_send_op(struct evtchn_send *send)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    XenEvtchnPort *p;
			
 
				+    int ret = 0;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(send->port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    p = &s->port_table[send->port];
			
 
				+
			
 
				+    switch (p->type) {
			
 
				+    case EVTCHNSTAT_interdomain:
			
 
				+        if (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) {
			
 
				+            /*
			
 
				+             * This is an event from the guest to qemu itself, which is
			
 
				+             * serving as the driver domain.
			
 
				+             */
			
 
				+            uint16_t be_port = p->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+            struct xenevtchn_handle *xc = s->be_handles[be_port];
			
 
				+            if (xc) {
			
 
				+                eventfd_write(xc->fd, 1);
			
 
				+                ret = 0;
			
 
				+            } else {
			
 
				+                ret = -ENOENT;
			
 
				+            }
			
 
				+        } else {
			
 
				+            /* Loopback interdomain ports; just a complex IPI */
			
 
				+            set_port_pending(s, p->type_val);
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_ipi:
			
 
				+        set_port_pending(s, send->port);
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNSTAT_unbound:
			
 
				+        /* Xen will silently drop these */
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        ret = -EINVAL;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_set_port(uint16_t port)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    XenEvtchnPort *p;
			
 
				+    int ret = -EINVAL;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    p = &s->port_table[port];
			
 
				+
			
 
				+    /* QEMU has no business sending to anything but these */
			
 
				+    if (p->type == EVTCHNSTAT_virq ||
			
 
				+        (p->type == EVTCHNSTAT_interdomain &&
			
 
				+         (p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU))) {
			
 
				+        set_port_pending(s, port);
			
 
				+        ret = 0;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
			
 
				+{
			
 
				+    uint16_t pirq;
			
 
				+
			
 
				+    /*
			
 
				+     * Preserve the allocation strategy that Xen has. It looks like
			
 
				+     * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
			
 
				+     * to GSIs (counting up from 16), and then we count backwards from
			
 
				+     * the top for MSIs or when the GSI space is exhausted.
			
 
				+     */
			
 
				+    if (type == MAP_PIRQ_TYPE_GSI) {
			
 
				+        for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
			
 
				+            if (pirq_inuse(s, pirq)) {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            /* Found it */
			
 
				+            goto found;
			
 
				+        }
			
 
				+    }
			
 
				+    for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
			
 
				+        /* Skip whole words at a time when they're full */
			
 
				+        if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
			
 
				+            pirq &= ~63ULL;
			
 
				+            continue;
			
 
				+        }
			
 
				+        if (pirq_inuse(s, pirq)) {
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        goto found;
			
 
				+    }
			
 
				+    return -ENOSPC;
			
 
				+
			
 
				+ found:
			
 
				+    pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
			
 
				+    if (gsi >= 0) {
			
 
				+        assert(gsi <= IOAPIC_NUM_PINS);
			
 
				+        s->gsi_pirq[gsi] = pirq;
			
 
				+    }
			
 
				+    s->pirq[pirq].gsi = gsi;
			
 
				+    return pirq;
			
 
				+}
			
 
				+
			
 
				+bool xen_evtchn_set_gsi(int gsi, int level)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq;
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    if (!s || gsi < 0 || gsi > IOAPIC_NUM_PINS) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Check that that it *isn't* the event channel GSI, and thus
			
 
				+     * that we are not recursing and it's safe to take s->port_lock.
			
 
				+     *
			
 
				+     * Locking aside, it's perfectly sane to bail out early for that
			
 
				+     * special case, as it would make no sense for the event channel
			
 
				+     * GSI to be routed back to event channels, when the delivery
			
 
				+     * method is to raise the GSI... that recursion wouldn't *just*
			
 
				+     * be a locking issue.
			
 
				+     */
			
 
				+    if (gsi && gsi == s->callback_gsi) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    pirq = s->gsi_pirq[gsi];
			
 
				+    if (!pirq) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (level) {
			
 
				+        int port = s->pirq[pirq].port;
			
 
				+
			
 
				+        s->pirq_gsi_set |= (1U << gsi);
			
 
				+        if (port) {
			
 
				+            set_port_pending(s, port);
			
 
				+        }
			
 
				+    } else {
			
 
				+        s->pirq_gsi_set &= ~(1U << gsi);
			
 
				+    }
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
			
 
				+{
			
 
				+    /* The vector (in low 8 bits of data) must be zero */
			
 
				+    if (data & 0xff) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    uint32_t pirq = (addr & 0xff000) >> 12;
			
 
				+    pirq |= (addr >> 32) & 0xffffff00;
			
 
				+
			
 
				+    return pirq;
			
 
				+}
			
 
				+
			
 
				+static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
			
 
				+                                 int except_pirq)
			
 
				+{
			
 
				+    uint32_t pirq;
			
 
				+
			
 
				+    for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
			
 
				+        /*
			
 
				+         * We could be cleverer here, but it isn't really a fast path, and
			
 
				+         * this trivial optimisation is enough to let us skip the big gap
			
 
				+         * in the middle a bit quicker (in terms of both loop iterations,
			
 
				+         * and cache lines).
			
 
				+         */
			
 
				+        if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
			
 
				+            pirq += 64;
			
 
				+            continue;
			
 
				+        }
			
 
				+        if (except_pirq && pirq == except_pirq) {
			
 
				+            continue;
			
 
				+        }
			
 
				+        if (s->pirq[pirq].dev != dev) {
			
 
				+            continue;
			
 
				+        }
			
 
				+        if (vector != -1 && s->pirq[pirq].vector != vector) {
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        /* It could theoretically be bound to a port already, but that is OK. */
			
 
				+        s->pirq[pirq].dev = dev;
			
 
				+        s->pirq[pirq].gsi = IRQ_UNBOUND;
			
 
				+        s->pirq[pirq].is_msix = false;
			
 
				+        s->pirq[pirq].vector = 0;
			
 
				+        s->pirq[pirq].is_masked = false;
			
 
				+        s->pirq[pirq].is_translated = false;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_remove_pci_device(PCIDevice *dev)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+    do_remove_pci_vector(s, dev, -1, 0);
			
 
				+}
			
 
				+
			
 
				+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
			
 
				+                          uint64_t addr, uint32_t data, bool is_masked)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    uint32_t pirq;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    pirq = msi_pirq_target(addr, data);
			
 
				+
			
 
				+    /*
			
 
				+     * The PIRQ# must be sane, and there must be an allocated PIRQ in
			
 
				+     * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
			
 
				+     */
			
 
				+    if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
			
 
				+        (s->pirq[pirq].gsi != IRQ_UNBOUND &&
			
 
				+         s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
			
 
				+        pirq = 0;
			
 
				+    }
			
 
				+
			
 
				+    if (pirq) {
			
 
				+        s->pirq[pirq].dev = dev;
			
 
				+        s->pirq[pirq].gsi = IRQ_MSI_EMU;
			
 
				+        s->pirq[pirq].is_msix = is_msix;
			
 
				+        s->pirq[pirq].vector = vector;
			
 
				+        s->pirq[pirq].is_masked = is_masked;
			
 
				+    }
			
 
				+
			
 
				+    /* Remove any (other) entries for this {device, vector} */
			
 
				+    do_remove_pci_vector(s, dev, vector, pirq);
			
 
				+}
			
 
				+
			
 
				+int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
			
 
				+                                  uint64_t address, uint32_t data)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    uint32_t pirq, port;
			
 
				+    CPUState *cpu;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return 1; /* Not a PIRQ */
			
 
				+    }
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    pirq = msi_pirq_target(address, data);
			
 
				+    if (!pirq || pirq >= s->nr_pirqs) {
			
 
				+        return 1; /* Not a PIRQ */
			
 
				+    }
			
 
				+
			
 
				+    if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /* Remember that KVM tried to translate this. It might need to try again. */
			
 
				+    s->pirq[pirq].is_translated = true;
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    port = s->pirq[pirq].port;
			
 
				+    if (!valid_port(port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    cpu = qemu_get_cpu(s->port_table[port].vcpu);
			
 
				+    if (!cpu) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
			
 
				+    route->u.xen_evtchn.port = port;
			
 
				+    route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
			
 
				+    route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
			
 
				+
			
 
				+    return 0; /* Handled */
			
 
				+}
			
 
				+
			
 
				+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    uint32_t pirq, port;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    pirq = msi_pirq_target(address, data);
			
 
				+    if (!pirq || pirq >= s->nr_pirqs) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    port = s->pirq[pirq].port;
			
 
				+    if (!valid_port(port)) {
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    set_port_pending(s, port);
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+int xen_physdev_map_pirq(struct physdev_map_pirq *map)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq = map->pirq;
			
 
				+    int gsi = map->index;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    if (map->domid != DOMID_SELF && map->domid != xen_domid) {
			
 
				+        return -EPERM;
			
 
				+    }
			
 
				+    if (map->type != MAP_PIRQ_TYPE_GSI) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+    if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (pirq < 0) {
			
 
				+        pirq = allocate_pirq(s, map->type, gsi);
			
 
				+        if (pirq < 0) {
			
 
				+            return pirq;
			
 
				+        }
			
 
				+        map->pirq = pirq;
			
 
				+    } else if (pirq > s->nr_pirqs) {
			
 
				+        return -EINVAL;
			
 
				+    } else {
			
 
				+        /*
			
 
				+         * User specified a valid-looking PIRQ#. Allow it if it is
			
 
				+         * allocated and not yet bound, or if it is unallocated
			
 
				+         */
			
 
				+        if (pirq_inuse(s, pirq)) {
			
 
				+            if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
			
 
				+                return -EBUSY;
			
 
				+            }
			
 
				+        } else {
			
 
				+            /* If it was unused, mark it used now. */
			
 
				+            pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
			
 
				+        }
			
 
				+        /* Set the mapping in both directions. */
			
 
				+        s->pirq[pirq].gsi = gsi;
			
 
				+        s->gsi_pirq[gsi] = pirq;
			
 
				+    }
			
 
				+
			
 
				+    trace_kvm_xen_map_pirq(pirq, gsi);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq = unmap->pirq;
			
 
				+    int gsi;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
			
 
				+        return -EPERM;
			
 
				+    }
			
 
				+    if (pirq < 0 || pirq >= s->nr_pirqs) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    if (!pirq_inuse(s, pirq)) {
			
 
				+        qemu_mutex_unlock(&s->port_lock);
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    gsi = s->pirq[pirq].gsi;
			
 
				+
			
 
				+    /* We can only unmap GSI PIRQs */
			
 
				+    if (gsi < 0) {
			
 
				+        qemu_mutex_unlock(&s->port_lock);
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    s->gsi_pirq[gsi] = 0;
			
 
				+    s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
			
 
				+    pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
			
 
				+
			
 
				+    trace_kvm_xen_unmap_pirq(pirq, gsi);
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    if (gsi == IRQ_MSI_EMU) {
			
 
				+        kvm_update_msi_routes_all(NULL, true, 0, 0);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq = eoi->irq;
			
 
				+    int gsi;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    if (!pirq_inuse(s, pirq)) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    gsi = s->pirq[pirq].gsi;
			
 
				+    if (gsi < 0) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /* Reassert a level IRQ if needed */
			
 
				+    if (s->pirq_gsi_set & (1U << gsi)) {
			
 
				+        int port = s->pirq[pirq].port;
			
 
				+        if (port) {
			
 
				+            set_port_pending(s, port);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq = query->irq;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    if (!pirq_inuse(s, pirq)) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    if (s->pirq[pirq].gsi >= 0) {
			
 
				+        query->flags = XENIRQSTAT_needs_eoi;
			
 
				+    } else {
			
 
				+        query->flags = 0;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int pirq;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
			
 
				+    if (pirq < 0) {
			
 
				+        return pirq;
			
 
				+    }
			
 
				+
			
 
				+    get->pirq = pirq;
			
 
				+    trace_kvm_xen_get_free_pirq(pirq, get->type);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+struct xenevtchn_handle *xen_be_evtchn_open(void)
			
 
				+{
			
 
				+    struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
			
 
				+
			
 
				+    xc->fd = eventfd(0, EFD_CLOEXEC);
			
 
				+    if (xc->fd < 0) {
			
 
				+        free(xc);
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    return xc;
			
 
				+}
			
 
				+
			
 
				+static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
			
 
				+{
			
 
				+    int i;
			
 
				+
			
 
				+    for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
			
 
				+        if (!s->be_handles[i]) {
			
 
				+            s->be_handles[i] = xc;
			
 
				+            xc->be_port = i;
			
 
				+            return i;
			
 
				+        }
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
			
 
				+                                   evtchn_port_t guest_port)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    XenEvtchnPort *gp;
			
 
				+    uint16_t be_port = 0;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (domid != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(guest_port)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    /* The guest has to have an unbound port waiting for us to bind */
			
 
				+    gp = &s->port_table[guest_port];
			
 
				+
			
 
				+    switch (gp->type) {
			
 
				+    case EVTCHNSTAT_interdomain:
			
 
				+        /* Allow rebinding after migration, preserve port # if possible */
			
 
				+        be_port = gp->type_val & ~PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+        assert(be_port != 0);
			
 
				+        if (!s->be_handles[be_port]) {
			
 
				+            s->be_handles[be_port] = xc;
			
 
				+            xc->guest_port = guest_port;
			
 
				+            ret = xc->be_port = be_port;
			
 
				+            if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+                assign_kernel_eventfd(gp->type, guest_port, xc->fd);
			
 
				+            }
			
 
				+            break;
			
 
				+        }
			
 
				+        /* fall through */
			
 
				+
			
 
				+    case EVTCHNSTAT_unbound:
			
 
				+        be_port = find_be_port(s, xc);
			
 
				+        if (!be_port) {
			
 
				+            ret = -ENOSPC;
			
 
				+            goto out;
			
 
				+        }
			
 
				+
			
 
				+        gp->type = EVTCHNSTAT_interdomain;
			
 
				+        gp->type_val = be_port | PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+        xc->guest_port = guest_port;
			
 
				+        if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+            assign_kernel_eventfd(gp->type, guest_port, xc->fd);
			
 
				+        }
			
 
				+        ret = be_port;
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        ret = -EINVAL;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+ out:
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    if (port && port != xc->be_port) {
			
 
				+        ret = -EINVAL;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    if (xc->guest_port) {
			
 
				+        XenEvtchnPort *gp = &s->port_table[xc->guest_port];
			
 
				+
			
 
				+        /* This should never *not* be true */
			
 
				+        if (gp->type == EVTCHNSTAT_interdomain) {
			
 
				+            gp->type = EVTCHNSTAT_unbound;
			
 
				+            gp->type_val = PORT_INFO_TYPEVAL_REMOTE_QEMU;
			
 
				+        }
			
 
				+
			
 
				+        if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+            deassign_kernel_port(xc->guest_port);
			
 
				+        }
			
 
				+        xc->guest_port = 0;
			
 
				+    }
			
 
				+
			
 
				+    s->be_handles[xc->be_port] = NULL;
			
 
				+    xc->be_port = 0;
			
 
				+    ret = 0;
			
 
				+ out:
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_close(struct xenevtchn_handle *xc)
			
 
				+{
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    xen_be_evtchn_unbind(xc, 0);
			
 
				+
			
 
				+    close(xc->fd);
			
 
				+    free(xc);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
			
 
				+{
			
 
				+    if (!xc) {
			
 
				+        return -1;
			
 
				+    }
			
 
				+    return xc->fd;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&s->port_lock);
			
 
				+
			
 
				+    if (xc->guest_port) {
			
 
				+        set_port_pending(s, xc->guest_port);
			
 
				+        ret = 0;
			
 
				+    } else {
			
 
				+        ret = -ENOTCONN;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_unlock(&s->port_lock);
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
			
 
				+{
			
 
				+    uint64_t val;
			
 
				+
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (!xc->be_port) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (eventfd_read(xc->fd, &val)) {
			
 
				+        return -errno;
			
 
				+    }
			
 
				+
			
 
				+    return val ? xc->be_port : 0;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
			
 
				+{
			
 
				+    if (!xc) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (xc->be_port != port) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * We don't actually do anything to unmask it; the event was already
			
 
				+     * consumed in xen_be_evtchn_pending().
			
 
				+     */
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
			
 
				+{
			
 
				+    return xc->guest_port;
			
 
				+}
			
 
				+
			
 
				+EvtchnInfoList *qmp_xen_event_list(Error **errp)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+    EvtchnInfoList *head = NULL, **tail = &head;
			
 
				+    void *shinfo, *pending, *mask;
			
 
				+    int i;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        error_setg(errp, "Xen event channel emulation not enabled");
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    shinfo = xen_overlay_get_shinfo_ptr();
			
 
				+    if (!shinfo) {
			
 
				+        error_setg(errp, "Xen shared info page not allocated");
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_is_long_mode()) {
			
 
				+        pending = shinfo + offsetof(struct shared_info, evtchn_pending);
			
 
				+        mask = shinfo + offsetof(struct shared_info, evtchn_mask);
			
 
				+    } else {
			
 
				+        pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
			
 
				+        mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    for (i = 0; i < s->nr_ports; i++) {
			
 
				+        XenEvtchnPort *p = &s->port_table[i];
			
 
				+        EvtchnInfo *info;
			
 
				+
			
 
				+        if (p->type == EVTCHNSTAT_closed) {
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        info = g_new0(EvtchnInfo, 1);
			
 
				+
			
 
				+        info->port = i;
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
			
 
				+        qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
			
 
				+
			
 
				+        info->type = p->type;
			
 
				+        if (p->type == EVTCHNSTAT_interdomain) {
			
 
				+            info->remote_domain = g_strdup((p->type_val & PORT_INFO_TYPEVAL_REMOTE_QEMU) ?
			
 
				+                                           "qemu" : "loopback");
			
 
				+            info->target = p->type_val & PORT_INFO_TYPEVAL_REMOTE_PORT_MASK;
			
 
				+        } else {
			
 
				+            info->target = p->type_val;
			
 
				+        }
			
 
				+        info->vcpu = p->vcpu;
			
 
				+        info->pending = test_bit(i, pending);
			
 
				+        info->masked = test_bit(i, mask);
			
 
				+
			
 
				+        QAPI_LIST_APPEND(tail, info);
			
 
				+    }
			
 
				+
			
 
				+    return head;
			
 
				+}
			
 
				+
			
 
				+void qmp_xen_event_inject(uint32_t port, Error **errp)
			
 
				+{
			
 
				+    XenEvtchnState *s = xen_evtchn_singleton;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        error_setg(errp, "Xen event channel emulation not enabled");
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    if (!valid_port(port)) {
			
 
				+        error_setg(errp, "Invalid port %u", port);
			
 
				+    }
			
 
				+
			
 
				+    QEMU_LOCK_GUARD(&s->port_lock);
			
 
				+
			
 
				+    if (set_port_pending(s, port)) {
			
 
				+        error_setg(errp, "Failed to set port %u", port);
			
 
				+        return;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
			
 
				+{
			
 
				+    EvtchnInfoList *iter, *info_list;
			
 
				+    Error *err = NULL;
			
 
				+
			
 
				+    info_list = qmp_xen_event_list(&err);
			
 
				+    if (err) {
			
 
				+        hmp_handle_error(mon, err);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    for (iter = info_list; iter; iter = iter->next) {
			
 
				+        EvtchnInfo *info = iter->value;
			
 
				+
			
 
				+        monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
			
 
				+                       EvtchnPortType_str(info->type));
			
 
				+        if (info->type != EVTCHN_PORT_TYPE_IPI) {
			
 
				+            monitor_printf(mon,  "(");
			
 
				+            if (info->remote_domain) {
			
 
				+                monitor_printf(mon, "%s:", info->remote_domain);
			
 
				+            }
			
 
				+            monitor_printf(mon, "%d)", info->target);
			
 
				+        }
			
 
				+        if (info->pending) {
			
 
				+            monitor_printf(mon, " PENDING");
			
 
				+        }
			
 
				+        if (info->masked) {
			
 
				+            monitor_printf(mon, " MASKED");
			
 
				+        }
			
 
				+        monitor_printf(mon, "\n");
			
 
				+    }
			
 
				+
			
 
				+    qapi_free_EvtchnInfoList(info_list);
			
 
				+}
			
 
				+
			
 
				+void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
			
 
				+{
			
 
				+    int port = qdict_get_int(qdict, "port");
			
 
				+    Error *err = NULL;
			
 
				+
			
 
				+    qmp_xen_event_inject(port, &err);
			
 
				+    if (err) {
			
 
				+        hmp_handle_error(mon, err);
			
 
				+    } else {
			
 
				+        monitor_printf(mon, "Delivered port %d\n", port);
			
 
				+    }
			
 
				+}
			
 
				+
			
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -0,0 +1,88 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Event channel support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_XEN_EVTCHN_H
			
 
				+#define QEMU_XEN_EVTCHN_H
			
 
				+
			
 
				+#include "hw/sysbus.h"
			
 
				+
			
 
				+typedef uint32_t evtchn_port_t;
			
 
				+
			
 
				+void xen_evtchn_create(void);
			
 
				+int xen_evtchn_soft_reset(void);
			
 
				+int xen_evtchn_set_callback_param(uint64_t param);
			
 
				+void xen_evtchn_connect_gsis(qemu_irq *system_gsis);
			
 
				+void xen_evtchn_set_callback_level(int level);
			
 
				+
			
 
				+int xen_evtchn_set_port(uint16_t port);
			
 
				+
			
 
				+bool xen_evtchn_set_gsi(int gsi, int level);
			
 
				+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
			
 
				+                          uint64_t addr, uint32_t data, bool is_masked);
			
 
				+void xen_evtchn_remove_pci_device(PCIDevice *dev);
			
 
				+struct kvm_irq_routing_entry;
			
 
				+int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
			
 
				+                                  uint64_t address, uint32_t data);
			
 
				+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data);
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * These functions mirror the libxenevtchn library API, providing the QEMU
			
 
				+ * backend side of "interdomain" event channels.
			
 
				+ */
			
 
				+struct xenevtchn_handle;
			
 
				+struct xenevtchn_handle *xen_be_evtchn_open(void);
			
 
				+int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
			
 
				+                                   evtchn_port_t guest_port);
			
 
				+int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port);
			
 
				+int xen_be_evtchn_close(struct xenevtchn_handle *xc);
			
 
				+int xen_be_evtchn_fd(struct xenevtchn_handle *xc);
			
 
				+int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port);
			
 
				+int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port);
			
 
				+int xen_be_evtchn_pending(struct xenevtchn_handle *xc);
			
 
				+/* Apart from this which is a local addition */
			
 
				+int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc);
			
 
				+
			
 
				+struct evtchn_status;
			
 
				+struct evtchn_close;
			
 
				+struct evtchn_unmask;
			
 
				+struct evtchn_bind_virq;
			
 
				+struct evtchn_bind_pirq;
			
 
				+struct evtchn_bind_ipi;
			
 
				+struct evtchn_send;
			
 
				+struct evtchn_alloc_unbound;
			
 
				+struct evtchn_bind_interdomain;
			
 
				+struct evtchn_bind_vcpu;
			
 
				+struct evtchn_reset;
			
 
				+int xen_evtchn_status_op(struct evtchn_status *status);
			
 
				+int xen_evtchn_close_op(struct evtchn_close *close);
			
 
				+int xen_evtchn_unmask_op(struct evtchn_unmask *unmask);
			
 
				+int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq);
			
 
				+int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq);
			
 
				+int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi);
			
 
				+int xen_evtchn_send_op(struct evtchn_send *send);
			
 
				+int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc);
			
 
				+int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain);
			
 
				+int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu);
			
 
				+int xen_evtchn_reset_op(struct evtchn_reset *reset);
			
 
				+
			
 
				+struct physdev_map_pirq;
			
 
				+struct physdev_unmap_pirq;
			
 
				+struct physdev_eoi;
			
 
				+struct physdev_irq_status_query;
			
 
				+struct physdev_get_free_pirq;
			
 
				+int xen_physdev_map_pirq(struct physdev_map_pirq *map);
			
 
				+int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap);
			
 
				+int xen_physdev_eoi_pirq(struct physdev_eoi *eoi);
			
 
				+int xen_physdev_query_pirq(struct physdev_irq_status_query *query);
			
 
				+int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get);
			
 
				+
			
 
				+#endif /* QEMU_XEN_EVTCHN_H */
			
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -0,0 +1,232 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Grant table support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+#include "qemu/host-utils.h"
			
 
				+#include "qemu/module.h"
			
 
				+#include "qemu/lockable.h"
			
 
				+#include "qemu/main-loop.h"
			
 
				+#include "qapi/error.h"
			
 
				+#include "qom/object.h"
			
 
				+#include "exec/target_page.h"
			
 
				+#include "exec/address-spaces.h"
			
 
				+#include "migration/vmstate.h"
			
 
				+
			
 
				+#include "hw/sysbus.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "xen_overlay.h"
			
 
				+#include "xen_gnttab.h"
			
 
				+
			
 
				+#include "sysemu/kvm.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				+
			
 
				+#include "hw/xen/interface/memory.h"
			
 
				+#include "hw/xen/interface/grant_table.h"
			
 
				+
			
 
				+#define TYPE_XEN_GNTTAB "xen-gnttab"
			
 
				+OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
			
 
				+
			
 
				+#define XEN_PAGE_SHIFT 12
			
 
				+#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
			
 
				+
			
 
				+#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
			
 
				+
			
 
				+struct XenGnttabState {
			
 
				+    /*< private >*/
			
 
				+    SysBusDevice busdev;
			
 
				+    /*< public >*/
			
 
				+
			
 
				+    QemuMutex gnt_lock;
			
 
				+
			
 
				+    uint32_t nr_frames;
			
 
				+    uint32_t max_frames;
			
 
				+
			
 
				+    union {
			
 
				+        grant_entry_v1_t *v1;
			
 
				+        /* Theoretically, v2 support could be added here. */
			
 
				+    } entries;
			
 
				+
			
 
				+    MemoryRegion gnt_frames;
			
 
				+    MemoryRegion *gnt_aliases;
			
 
				+    uint64_t *gnt_frame_gpas;
			
 
				+};
			
 
				+
			
 
				+struct XenGnttabState *xen_gnttab_singleton;
			
 
				+
			
 
				+static void xen_gnttab_realize(DeviceState *dev, Error **errp)
			
 
				+{
			
 
				+    XenGnttabState *s = XEN_GNTTAB(dev);
			
 
				+    int i;
			
 
				+
			
 
				+    if (xen_mode != XEN_EMULATE) {
			
 
				+        error_setg(errp, "Xen grant table support is for Xen emulation");
			
 
				+        return;
			
 
				+    }
			
 
				+    s->nr_frames = 0;
			
 
				+    s->max_frames = kvm_xen_get_gnttab_max_frames();
			
 
				+    memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
			
 
				+                           XEN_PAGE_SIZE * s->max_frames, &error_abort);
			
 
				+    memory_region_set_enabled(&s->gnt_frames, true);
			
 
				+    s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
			
 
				+    memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
			
 
				+
			
 
				+    /* Create individual page-sizes aliases for overlays */
			
 
				+    s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
			
 
				+    s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames);
			
 
				+    for (i = 0; i < s->max_frames; i++) {
			
 
				+        memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev),
			
 
				+                                 NULL, &s->gnt_frames,
			
 
				+                                 i * XEN_PAGE_SIZE, XEN_PAGE_SIZE);
			
 
				+        s->gnt_frame_gpas[i] = INVALID_GPA;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_init(&s->gnt_lock);
			
 
				+
			
 
				+    xen_gnttab_singleton = s;
			
 
				+}
			
 
				+
			
 
				+static int xen_gnttab_post_load(void *opaque, int version_id)
			
 
				+{
			
 
				+    XenGnttabState *s = XEN_GNTTAB(opaque);
			
 
				+    uint32_t i;
			
 
				+
			
 
				+    for (i = 0; i < s->nr_frames; i++) {
			
 
				+        if (s->gnt_frame_gpas[i] != INVALID_GPA) {
			
 
				+            xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]);
			
 
				+        }
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool xen_gnttab_is_needed(void *opaque)
			
 
				+{
			
 
				+    return xen_mode == XEN_EMULATE;
			
 
				+}
			
 
				+
			
 
				+static const VMStateDescription xen_gnttab_vmstate = {
			
 
				+    .name = "xen_gnttab",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .needed = xen_gnttab_is_needed,
			
 
				+    .post_load = xen_gnttab_post_load,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT32(nr_frames, XenGnttabState),
			
 
				+        VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0,
			
 
				+                              vmstate_info_uint64, uint64_t),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+static void xen_gnttab_class_init(ObjectClass *klass, void *data)
			
 
				+{
			
 
				+    DeviceClass *dc = DEVICE_CLASS(klass);
			
 
				+
			
 
				+    dc->realize = xen_gnttab_realize;
			
 
				+    dc->vmsd = &xen_gnttab_vmstate;
			
 
				+}
			
 
				+
			
 
				+static const TypeInfo xen_gnttab_info = {
			
 
				+    .name          = TYPE_XEN_GNTTAB,
			
 
				+    .parent        = TYPE_SYS_BUS_DEVICE,
			
 
				+    .instance_size = sizeof(XenGnttabState),
			
 
				+    .class_init    = xen_gnttab_class_init,
			
 
				+};
			
 
				+
			
 
				+void xen_gnttab_create(void)
			
 
				+{
			
 
				+    xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB,
			
 
				+                                                           -1, NULL));
			
 
				+}
			
 
				+
			
 
				+static void xen_gnttab_register_types(void)
			
 
				+{
			
 
				+    type_register_static(&xen_gnttab_info);
			
 
				+}
			
 
				+
			
 
				+type_init(xen_gnttab_register_types)
			
 
				+
			
 
				+int xen_gnttab_map_page(uint64_t idx, uint64_t gfn)
			
 
				+{
			
 
				+    XenGnttabState *s = xen_gnttab_singleton;
			
 
				+    uint64_t gpa = gfn << XEN_PAGE_SHIFT;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (idx >= s->max_frames) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+    QEMU_LOCK_GUARD(&s->gnt_lock);
			
 
				+
			
 
				+    xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa);
			
 
				+
			
 
				+    s->gnt_frame_gpas[idx] = gpa;
			
 
				+
			
 
				+    if (s->nr_frames <= idx) {
			
 
				+        s->nr_frames = idx + 1;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_gnttab_set_version_op(struct gnttab_set_version *set)
			
 
				+{
			
 
				+    int ret;
			
 
				+
			
 
				+    switch (set->version) {
			
 
				+    case 1:
			
 
				+        ret = 0;
			
 
				+        break;
			
 
				+
			
 
				+    case 2:
			
 
				+        /* Behave as before set_version was introduced. */
			
 
				+        ret = -ENOSYS;
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        ret = -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    set->version = 1;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_gnttab_get_version_op(struct gnttab_get_version *get)
			
 
				+{
			
 
				+    if (get->dom != DOMID_SELF && get->dom != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    get->version = 1;
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int xen_gnttab_query_size_op(struct gnttab_query_size *size)
			
 
				+{
			
 
				+    XenGnttabState *s = xen_gnttab_singleton;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    if (size->dom != DOMID_SELF && size->dom != xen_domid) {
			
 
				+        size->status = GNTST_bad_domain;
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    size->status = GNTST_okay;
			
 
				+    size->nr_frames = s->nr_frames;
			
 
				+    size->max_nr_frames = s->max_frames;
			
 
				+    return 0;
			
 
				+}
			
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -0,0 +1,25 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Grant table support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_XEN_GNTTAB_H
			
 
				+#define QEMU_XEN_GNTTAB_H
			
 
				+
			
 
				+void xen_gnttab_create(void);
			
 
				+int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
			
 
				+
			
 
				+struct gnttab_set_version;
			
 
				+struct gnttab_get_version;
			
 
				+struct gnttab_query_size;
			
 
				+int xen_gnttab_set_version_op(struct gnttab_set_version *set);
			
 
				+int xen_gnttab_get_version_op(struct gnttab_get_version *get);
			
 
				+int xen_gnttab_query_size_op(struct gnttab_query_size *size);
			
 
				+
			
 
				+#endif /* QEMU_XEN_GNTTAB_H */
			
--- a/hw/i386/kvm/xen_overlay.c
+++ b/hw/i386/kvm/xen_overlay.c
@@ -0,0 +1,272 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Shared/overlay pages support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+#include "qemu/host-utils.h"
			
 
				+#include "qemu/module.h"
			
 
				+#include "qemu/main-loop.h"
			
 
				+#include "qapi/error.h"
			
 
				+#include "qom/object.h"
			
 
				+#include "exec/target_page.h"
			
 
				+#include "exec/address-spaces.h"
			
 
				+#include "migration/vmstate.h"
			
 
				+
			
 
				+#include "hw/sysbus.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "xen_overlay.h"
			
 
				+
			
 
				+#include "sysemu/kvm.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				+#include <linux/kvm.h>
			
 
				+
			
 
				+#include "hw/xen/interface/memory.h"
			
 
				+
			
 
				+
			
 
				+#define TYPE_XEN_OVERLAY "xen-overlay"
			
 
				+OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY)
			
 
				+
			
 
				+#define XEN_PAGE_SHIFT 12
			
 
				+#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
			
 
				+
			
 
				+struct XenOverlayState {
			
 
				+    /*< private >*/
			
 
				+    SysBusDevice busdev;
			
 
				+    /*< public >*/
			
 
				+
			
 
				+    MemoryRegion shinfo_mem;
			
 
				+    void *shinfo_ptr;
			
 
				+    uint64_t shinfo_gpa;
			
 
				+    bool long_mode;
			
 
				+};
			
 
				+
			
 
				+struct XenOverlayState *xen_overlay_singleton;
			
 
				+
			
 
				+void xen_overlay_do_map_page(MemoryRegion *page, uint64_t gpa)
			
 
				+{
			
 
				+    /*
			
 
				+     * Xen allows guests to map the same page as many times as it likes
			
 
				+     * into guest physical frames. We don't, because it would be hard
			
 
				+     * to track and restore them all. One mapping of each page is
			
 
				+     * perfectly sufficient for all known guests... and we've tested
			
 
				+     * that theory on a few now in other implementations. dwmw2.
			
 
				+     */
			
 
				+    if (memory_region_is_mapped(page)) {
			
 
				+        if (gpa == INVALID_GPA) {
			
 
				+            memory_region_del_subregion(get_system_memory(), page);
			
 
				+        } else {
			
 
				+            /* Just move it */
			
 
				+            memory_region_set_address(page, gpa);
			
 
				+        }
			
 
				+    } else if (gpa != INVALID_GPA) {
			
 
				+        memory_region_add_subregion_overlap(get_system_memory(), gpa, page, 0);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/* KVM is the only existing back end for now. Let's not overengineer it yet. */
			
 
				+static int xen_overlay_set_be_shinfo(uint64_t gfn)
			
 
				+{
			
 
				+    struct kvm_xen_hvm_attr xa = {
			
 
				+        .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
			
 
				+        .u.shared_info.gfn = gfn,
			
 
				+    };
			
 
				+
			
 
				+    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void xen_overlay_realize(DeviceState *dev, Error **errp)
			
 
				+{
			
 
				+    XenOverlayState *s = XEN_OVERLAY(dev);
			
 
				+
			
 
				+    if (xen_mode != XEN_EMULATE) {
			
 
				+        error_setg(errp, "Xen overlay page support is for Xen emulation");
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info",
			
 
				+                           XEN_PAGE_SIZE, &error_abort);
			
 
				+    memory_region_set_enabled(&s->shinfo_mem, true);
			
 
				+
			
 
				+    s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem);
			
 
				+    s->shinfo_gpa = INVALID_GPA;
			
 
				+    s->long_mode = false;
			
 
				+    memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE);
			
 
				+}
			
 
				+
			
 
				+static int xen_overlay_pre_save(void *opaque)
			
 
				+{
			
 
				+    /*
			
 
				+     * Fetch the kernel's idea of long_mode to avoid the race condition
			
 
				+     * where the guest has set the hypercall page up in 64-bit mode but
			
 
				+     * not yet made a hypercall by the time migration happens, so qemu
			
 
				+     * hasn't yet noticed.
			
 
				+     */
			
 
				+    return xen_sync_long_mode();
			
 
				+}
			
 
				+
			
 
				+static int xen_overlay_post_load(void *opaque, int version_id)
			
 
				+{
			
 
				+    XenOverlayState *s = opaque;
			
 
				+
			
 
				+    if (s->shinfo_gpa != INVALID_GPA) {
			
 
				+        xen_overlay_do_map_page(&s->shinfo_mem, s->shinfo_gpa);
			
 
				+        xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT);
			
 
				+    }
			
 
				+    if (s->long_mode) {
			
 
				+        xen_set_long_mode(true);
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool xen_overlay_is_needed(void *opaque)
			
 
				+{
			
 
				+    return xen_mode == XEN_EMULATE;
			
 
				+}
			
 
				+
			
 
				+static const VMStateDescription xen_overlay_vmstate = {
			
 
				+    .name = "xen_overlay",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .needed = xen_overlay_is_needed,
			
 
				+    .pre_save = xen_overlay_pre_save,
			
 
				+    .post_load = xen_overlay_post_load,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT64(shinfo_gpa, XenOverlayState),
			
 
				+        VMSTATE_BOOL(long_mode, XenOverlayState),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+static void xen_overlay_reset(DeviceState *dev)
			
 
				+{
			
 
				+    kvm_xen_soft_reset();
			
 
				+}
			
 
				+
			
 
				+static void xen_overlay_class_init(ObjectClass *klass, void *data)
			
 
				+{
			
 
				+    DeviceClass *dc = DEVICE_CLASS(klass);
			
 
				+
			
 
				+    dc->reset = xen_overlay_reset;
			
 
				+    dc->realize = xen_overlay_realize;
			
 
				+    dc->vmsd = &xen_overlay_vmstate;
			
 
				+}
			
 
				+
			
 
				+static const TypeInfo xen_overlay_info = {
			
 
				+    .name          = TYPE_XEN_OVERLAY,
			
 
				+    .parent        = TYPE_SYS_BUS_DEVICE,
			
 
				+    .instance_size = sizeof(XenOverlayState),
			
 
				+    .class_init    = xen_overlay_class_init,
			
 
				+};
			
 
				+
			
 
				+void xen_overlay_create(void)
			
 
				+{
			
 
				+    xen_overlay_singleton = XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY,
			
 
				+                                                             -1, NULL));
			
 
				+
			
 
				+    /* If xen_domid wasn't explicitly set, at least make sure it isn't zero. */
			
 
				+    if (xen_domid == DOMID_QEMU) {
			
 
				+        xen_domid = 1;
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+static void xen_overlay_register_types(void)
			
 
				+{
			
 
				+    type_register_static(&xen_overlay_info);
			
 
				+}
			
 
				+
			
 
				+type_init(xen_overlay_register_types)
			
 
				+
			
 
				+int xen_overlay_map_shinfo_page(uint64_t gpa)
			
 
				+{
			
 
				+    XenOverlayState *s = xen_overlay_singleton;
			
 
				+    int ret;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    if (s->shinfo_gpa) {
			
 
				+        /* If removing shinfo page, turn the kernel magic off first */
			
 
				+        ret = xen_overlay_set_be_shinfo(INVALID_GFN);
			
 
				+        if (ret) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    xen_overlay_do_map_page(&s->shinfo_mem, gpa);
			
 
				+    if (gpa != INVALID_GPA) {
			
 
				+        ret = xen_overlay_set_be_shinfo(gpa >> XEN_PAGE_SHIFT);
			
 
				+        if (ret) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+    s->shinfo_gpa = gpa;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+void *xen_overlay_get_shinfo_ptr(void)
			
 
				+{
			
 
				+    XenOverlayState *s = xen_overlay_singleton;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    return s->shinfo_ptr;
			
 
				+}
			
 
				+
			
 
				+int xen_sync_long_mode(void)
			
 
				+{
			
 
				+    int ret;
			
 
				+    struct kvm_xen_hvm_attr xa = {
			
 
				+        .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
			
 
				+    };
			
 
				+
			
 
				+    if (!xen_overlay_singleton) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_GET_ATTR, &xa);
			
 
				+    if (!ret) {
			
 
				+        xen_overlay_singleton->long_mode = xa.u.long_mode;
			
 
				+    }
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+int xen_set_long_mode(bool long_mode)
			
 
				+{
			
 
				+    int ret;
			
 
				+    struct kvm_xen_hvm_attr xa = {
			
 
				+        .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
			
 
				+        .u.long_mode = long_mode,
			
 
				+    };
			
 
				+
			
 
				+    if (!xen_overlay_singleton) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
			
 
				+    if (!ret) {
			
 
				+        xen_overlay_singleton->long_mode = xa.u.long_mode;
			
 
				+    }
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+bool xen_is_long_mode(void)
			
 
				+{
			
 
				+    return xen_overlay_singleton && xen_overlay_singleton->long_mode;
			
 
				+}
			
--- a/hw/i386/kvm/xen_overlay.h
+++ b/hw/i386/kvm/xen_overlay.h
@@ -0,0 +1,26 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Shared/overlay pages support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_XEN_OVERLAY_H
			
 
				+#define QEMU_XEN_OVERLAY_H
			
 
				+
			
 
				+void xen_overlay_create(void);
			
 
				+
			
 
				+int xen_overlay_map_shinfo_page(uint64_t gpa);
			
 
				+void *xen_overlay_get_shinfo_ptr(void);
			
 
				+
			
 
				+int xen_sync_long_mode(void);
			
 
				+int xen_set_long_mode(bool long_mode);
			
 
				+bool xen_is_long_mode(void);
			
 
				+
			
 
				+void xen_overlay_do_map_page(MemoryRegion *page, uint64_t gpa);
			
 
				+
			
 
				+#endif /* QEMU_XEN_OVERLAY_H */
			
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -0,0 +1,500 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Shared/overlay pages support
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+
			
 
				+#include "qemu/host-utils.h"
			
 
				+#include "qemu/module.h"
			
 
				+#include "qemu/main-loop.h"
			
 
				+#include "qemu/cutils.h"
			
 
				+#include "qapi/error.h"
			
 
				+#include "qom/object.h"
			
 
				+#include "migration/vmstate.h"
			
 
				+
			
 
				+#include "hw/sysbus.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "xen_overlay.h"
			
 
				+#include "xen_evtchn.h"
			
 
				+#include "xen_xenstore.h"
			
 
				+
			
 
				+#include "sysemu/kvm.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				+
			
 
				+#include "hw/xen/interface/io/xs_wire.h"
			
 
				+#include "hw/xen/interface/event_channel.h"
			
 
				+
			
 
				+#define TYPE_XEN_XENSTORE "xen-xenstore"
			
 
				+OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
			
 
				+
			
 
				+#define XEN_PAGE_SHIFT 12
			
 
				+#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
			
 
				+
			
 
				+#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
			
 
				+#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
			
 
				+
			
 
				+#define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
			
 
				+
			
 
				+struct XenXenstoreState {
			
 
				+    /*< private >*/
			
 
				+    SysBusDevice busdev;
			
 
				+    /*< public >*/
			
 
				+
			
 
				+    MemoryRegion xenstore_page;
			
 
				+    struct xenstore_domain_interface *xs;
			
 
				+    uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
			
 
				+    uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
			
 
				+    uint32_t req_offset;
			
 
				+    uint32_t rsp_offset;
			
 
				+    bool rsp_pending;
			
 
				+    bool fatal_error;
			
 
				+
			
 
				+    evtchn_port_t guest_port;
			
 
				+    evtchn_port_t be_port;
			
 
				+    struct xenevtchn_handle *eh;
			
 
				+};
			
 
				+
			
 
				+struct XenXenstoreState *xen_xenstore_singleton;
			
 
				+
			
 
				+static void xen_xenstore_event(void *opaque);
			
 
				+
			
 
				+static void xen_xenstore_realize(DeviceState *dev, Error **errp)
			
 
				+{
			
 
				+    XenXenstoreState *s = XEN_XENSTORE(dev);
			
 
				+
			
 
				+    if (xen_mode != XEN_EMULATE) {
			
 
				+        error_setg(errp, "Xen xenstore support is for Xen emulation");
			
 
				+        return;
			
 
				+    }
			
 
				+    memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
			
 
				+                           XEN_PAGE_SIZE, &error_abort);
			
 
				+    memory_region_set_enabled(&s->xenstore_page, true);
			
 
				+    s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
			
 
				+    memset(s->xs, 0, XEN_PAGE_SIZE);
			
 
				+
			
 
				+    /* We can't map it this early as KVM isn't ready */
			
 
				+    xen_xenstore_singleton = s;
			
 
				+
			
 
				+    s->eh = xen_be_evtchn_open();
			
 
				+    if (!s->eh) {
			
 
				+        error_setg(errp, "Xenstore evtchn port init failed");
			
 
				+        return;
			
 
				+    }
			
 
				+    aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
			
 
				+                       xen_xenstore_event, NULL, NULL, NULL, s);
			
 
				+}
			
 
				+
			
 
				+static bool xen_xenstore_is_needed(void *opaque)
			
 
				+{
			
 
				+    return xen_mode == XEN_EMULATE;
			
 
				+}
			
 
				+
			
 
				+static int xen_xenstore_pre_save(void *opaque)
			
 
				+{
			
 
				+    XenXenstoreState *s = opaque;
			
 
				+
			
 
				+    if (s->eh) {
			
 
				+        s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int xen_xenstore_post_load(void *opaque, int ver)
			
 
				+{
			
 
				+    XenXenstoreState *s = opaque;
			
 
				+
			
 
				+    /*
			
 
				+     * As qemu/dom0, rebind to the guest's port. The Windows drivers may
			
 
				+     * unbind the XenStore evtchn and rebind to it, having obtained the
			
 
				+     * "remote" port through EVTCHNOP_status. In the case that migration
			
 
				+     * occurs while it's unbound, the "remote" port needs to be the same
			
 
				+     * as before so that the guest can find it, but should remain unbound.
			
 
				+     */
			
 
				+    if (s->guest_port) {
			
 
				+        int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
			
 
				+                                                     s->guest_port);
			
 
				+        if (be_port < 0) {
			
 
				+            return be_port;
			
 
				+        }
			
 
				+        s->be_port = be_port;
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static const VMStateDescription xen_xenstore_vmstate = {
			
 
				+    .name = "xen_xenstore",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .needed = xen_xenstore_is_needed,
			
 
				+    .pre_save = xen_xenstore_pre_save,
			
 
				+    .post_load = xen_xenstore_post_load,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
			
 
				+                            sizeof_field(XenXenstoreState, req_data)),
			
 
				+        VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
			
 
				+                            sizeof_field(XenXenstoreState, rsp_data)),
			
 
				+        VMSTATE_UINT32(req_offset, XenXenstoreState),
			
 
				+        VMSTATE_UINT32(rsp_offset, XenXenstoreState),
			
 
				+        VMSTATE_BOOL(rsp_pending, XenXenstoreState),
			
 
				+        VMSTATE_UINT32(guest_port, XenXenstoreState),
			
 
				+        VMSTATE_BOOL(fatal_error, XenXenstoreState),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+static void xen_xenstore_class_init(ObjectClass *klass, void *data)
			
 
				+{
			
 
				+    DeviceClass *dc = DEVICE_CLASS(klass);
			
 
				+
			
 
				+    dc->realize = xen_xenstore_realize;
			
 
				+    dc->vmsd = &xen_xenstore_vmstate;
			
 
				+}
			
 
				+
			
 
				+static const TypeInfo xen_xenstore_info = {
			
 
				+    .name          = TYPE_XEN_XENSTORE,
			
 
				+    .parent        = TYPE_SYS_BUS_DEVICE,
			
 
				+    .instance_size = sizeof(XenXenstoreState),
			
 
				+    .class_init    = xen_xenstore_class_init,
			
 
				+};
			
 
				+
			
 
				+void xen_xenstore_create(void)
			
 
				+{
			
 
				+    DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
			
 
				+
			
 
				+    xen_xenstore_singleton = XEN_XENSTORE(dev);
			
 
				+
			
 
				+    /*
			
 
				+     * Defer the init (xen_xenstore_reset()) until KVM is set up and the
			
 
				+     * overlay page can be mapped.
			
 
				+     */
			
 
				+}
			
 
				+
			
 
				+static void xen_xenstore_register_types(void)
			
 
				+{
			
 
				+    type_register_static(&xen_xenstore_info);
			
 
				+}
			
 
				+
			
 
				+type_init(xen_xenstore_register_types)
			
 
				+
			
 
				+uint16_t xen_xenstore_get_port(void)
			
 
				+{
			
 
				+    XenXenstoreState *s = xen_xenstore_singleton;
			
 
				+    if (!s) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+    return s->guest_port;
			
 
				+}
			
 
				+
			
 
				+static bool req_pending(XenXenstoreState *s)
			
 
				+{
			
 
				+    struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
			
 
				+
			
 
				+    return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
			
 
				+}
			
 
				+
			
 
				+static void reset_req(XenXenstoreState *s)
			
 
				+{
			
 
				+    memset(s->req_data, 0, sizeof(s->req_data));
			
 
				+    s->req_offset = 0;
			
 
				+}
			
 
				+
			
 
				+static void reset_rsp(XenXenstoreState *s)
			
 
				+{
			
 
				+    s->rsp_pending = false;
			
 
				+
			
 
				+    memset(s->rsp_data, 0, sizeof(s->rsp_data));
			
 
				+    s->rsp_offset = 0;
			
 
				+}
			
 
				+
			
 
				+static void process_req(XenXenstoreState *s)
			
 
				+{
			
 
				+    struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
			
 
				+    struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
			
 
				+    const char enosys[] = "ENOSYS";
			
 
				+
			
 
				+    assert(req_pending(s));
			
 
				+    assert(!s->rsp_pending);
			
 
				+
			
 
				+    rsp->type = XS_ERROR;
			
 
				+    rsp->req_id = req->req_id;
			
 
				+    rsp->tx_id = req->tx_id;
			
 
				+    rsp->len = sizeof(enosys);
			
 
				+    memcpy((void *)&rsp[1], enosys, sizeof(enosys));
			
 
				+
			
 
				+    s->rsp_pending = true;
			
 
				+    reset_req(s);
			
 
				+}
			
 
				+
			
 
				+static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
			
 
				+                                   unsigned int len)
			
 
				+{
			
 
				+    if (!len) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
			
 
				+    XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
			
 
				+    unsigned int copied = 0;
			
 
				+
			
 
				+    /* Ensure the ring contents don't cross the req_prod access. */
			
 
				+    smp_rmb();
			
 
				+
			
 
				+    while (len) {
			
 
				+        unsigned int avail = prod - cons;
			
 
				+        unsigned int offset = MASK_XENSTORE_IDX(cons);
			
 
				+        unsigned int copylen = avail;
			
 
				+
			
 
				+        if (avail > XENSTORE_RING_SIZE) {
			
 
				+            error_report("XenStore ring handling error");
			
 
				+            s->fatal_error = true;
			
 
				+            break;
			
 
				+        } else if (avail == 0) {
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        if (copylen > len) {
			
 
				+            copylen = len;
			
 
				+        }
			
 
				+        if (copylen > XENSTORE_RING_SIZE - offset) {
			
 
				+            copylen = XENSTORE_RING_SIZE - offset;
			
 
				+        }
			
 
				+
			
 
				+        memcpy(ptr, &s->xs->req[offset], copylen);
			
 
				+        copied += copylen;
			
 
				+
			
 
				+        ptr += copylen;
			
 
				+        len -= copylen;
			
 
				+
			
 
				+        cons += copylen;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Not sure this ever mattered except on Alpha, but this barrier
			
 
				+     * is to ensure that the update to req_cons is globally visible
			
 
				+     * only after we have consumed all the data from the ring, and we
			
 
				+     * don't end up seeing data written to the ring *after* the other
			
 
				+     * end sees the update and writes more to the ring. Xen's own
			
 
				+     * xenstored has the same barrier here (although with no comment
			
 
				+     * at all, obviously, because it's Xen code).
			
 
				+     */
			
 
				+    smp_mb();
			
 
				+
			
 
				+    qatomic_set(&s->xs->req_cons, cons);
			
 
				+
			
 
				+    return copied;
			
 
				+}
			
 
				+
			
 
				+static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
			
 
				+                                 unsigned int len)
			
 
				+{
			
 
				+    if (!len) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
			
 
				+    XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
			
 
				+    unsigned int copied = 0;
			
 
				+
			
 
				+    /*
			
 
				+     * This matches the barrier in copy_to_ring() (or the guest's
			
 
				+     * equivalent) betweem writing the data to the ring and updating
			
 
				+     * rsp_prod. It protects against the pathological case (which
			
 
				+     * again I think never happened except on Alpha) where our
			
 
				+     * subsequent writes to the ring could *cross* the read of
			
 
				+     * rsp_cons and the guest could see the new data when it was
			
 
				+     * intending to read the old.
			
 
				+     */
			
 
				+    smp_mb();
			
 
				+
			
 
				+    while (len) {
			
 
				+        unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
			
 
				+        unsigned int offset = MASK_XENSTORE_IDX(prod);
			
 
				+        unsigned int copylen = len;
			
 
				+
			
 
				+        if (avail > XENSTORE_RING_SIZE) {
			
 
				+            error_report("XenStore ring handling error");
			
 
				+            s->fatal_error = true;
			
 
				+            break;
			
 
				+        } else if (avail == 0) {
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        if (copylen > avail) {
			
 
				+            copylen = avail;
			
 
				+        }
			
 
				+        if (copylen > XENSTORE_RING_SIZE - offset) {
			
 
				+            copylen = XENSTORE_RING_SIZE - offset;
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+        memcpy(&s->xs->rsp[offset], ptr, copylen);
			
 
				+        copied += copylen;
			
 
				+
			
 
				+        ptr += copylen;
			
 
				+        len -= copylen;
			
 
				+
			
 
				+        prod += copylen;
			
 
				+    }
			
 
				+
			
 
				+    /* Ensure the ring contents are seen before rsp_prod update. */
			
 
				+    smp_wmb();
			
 
				+
			
 
				+    qatomic_set(&s->xs->rsp_prod, prod);
			
 
				+
			
 
				+    return copied;
			
 
				+}
			
 
				+
			
 
				+static unsigned int get_req(XenXenstoreState *s)
			
 
				+{
			
 
				+    unsigned int copied = 0;
			
 
				+
			
 
				+    if (s->fatal_error) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    assert(!req_pending(s));
			
 
				+
			
 
				+    if (s->req_offset < XENSTORE_HEADER_SIZE) {
			
 
				+        void *ptr = s->req_data + s->req_offset;
			
 
				+        unsigned int len = XENSTORE_HEADER_SIZE;
			
 
				+        unsigned int copylen = copy_from_ring(s, ptr, len);
			
 
				+
			
 
				+        copied += copylen;
			
 
				+        s->req_offset += copylen;
			
 
				+    }
			
 
				+
			
 
				+    if (s->req_offset >= XENSTORE_HEADER_SIZE) {
			
 
				+        struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
			
 
				+
			
 
				+        if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
			
 
				+            error_report("Illegal XenStore request");
			
 
				+            s->fatal_error = true;
			
 
				+            return 0;
			
 
				+        }
			
 
				+
			
 
				+        void *ptr = s->req_data + s->req_offset;
			
 
				+        unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
			
 
				+        unsigned int copylen = copy_from_ring(s, ptr, len);
			
 
				+
			
 
				+        copied += copylen;
			
 
				+        s->req_offset += copylen;
			
 
				+    }
			
 
				+
			
 
				+    return copied;
			
 
				+}
			
 
				+
			
 
				+static unsigned int put_rsp(XenXenstoreState *s)
			
 
				+{
			
 
				+    if (s->fatal_error) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    assert(s->rsp_pending);
			
 
				+
			
 
				+    struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
			
 
				+    assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
			
 
				+
			
 
				+    void *ptr = s->rsp_data + s->rsp_offset;
			
 
				+    unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
			
 
				+    unsigned int copylen = copy_to_ring(s, ptr, len);
			
 
				+
			
 
				+    s->rsp_offset += copylen;
			
 
				+
			
 
				+    /* Have we produced a complete response? */
			
 
				+    if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
			
 
				+        reset_rsp(s);
			
 
				+    }
			
 
				+
			
 
				+    return copylen;
			
 
				+}
			
 
				+
			
 
				+static void xen_xenstore_event(void *opaque)
			
 
				+{
			
 
				+    XenXenstoreState *s = opaque;
			
 
				+    evtchn_port_t port = xen_be_evtchn_pending(s->eh);
			
 
				+    unsigned int copied_to, copied_from;
			
 
				+    bool processed, notify = false;
			
 
				+
			
 
				+    if (port != s->be_port) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    /* We know this is a no-op. */
			
 
				+    xen_be_evtchn_unmask(s->eh, port);
			
 
				+
			
 
				+    do {
			
 
				+        copied_to = copied_from = 0;
			
 
				+        processed = false;
			
 
				+
			
 
				+        if (s->rsp_pending) {
			
 
				+            copied_to = put_rsp(s);
			
 
				+        }
			
 
				+
			
 
				+        if (!req_pending(s)) {
			
 
				+            copied_from = get_req(s);
			
 
				+        }
			
 
				+
			
 
				+        if (req_pending(s) && !s->rsp_pending) {
			
 
				+            process_req(s);
			
 
				+            processed = true;
			
 
				+        }
			
 
				+
			
 
				+        notify |= copied_to || copied_from;
			
 
				+    } while (copied_to || copied_from || processed);
			
 
				+
			
 
				+    if (notify) {
			
 
				+        xen_be_evtchn_notify(s->eh, s->be_port);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void alloc_guest_port(XenXenstoreState *s)
			
 
				+{
			
 
				+    struct evtchn_alloc_unbound alloc = {
			
 
				+        .dom = DOMID_SELF,
			
 
				+        .remote_dom = DOMID_QEMU,
			
 
				+    };
			
 
				+
			
 
				+    if (!xen_evtchn_alloc_unbound_op(&alloc)) {
			
 
				+        s->guest_port = alloc.port;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int xen_xenstore_reset(void)
			
 
				+{
			
 
				+    XenXenstoreState *s = xen_xenstore_singleton;
			
 
				+    int err;
			
 
				+
			
 
				+    if (!s) {
			
 
				+        return -ENOTSUP;
			
 
				+    }
			
 
				+
			
 
				+    s->req_offset = s->rsp_offset = 0;
			
 
				+    s->rsp_pending = false;
			
 
				+
			
 
				+    if (!memory_region_is_mapped(&s->xenstore_page)) {
			
 
				+        uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
			
 
				+        xen_overlay_do_map_page(&s->xenstore_page, gpa);
			
 
				+    }
			
 
				+
			
 
				+    alloc_guest_port(s);
			
 
				+
			
 
				+    /*
			
 
				+     * As qemu/dom0, bind to the guest's port. For incoming migration, this
			
 
				+     * will be unbound as the guest's evtchn table is overwritten. We then
			
 
				+     * rebind to the correct guest port in xen_xenstore_post_load().
			
 
				+     */
			
 
				+    err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
			
 
				+    if (err < 0) {
			
 
				+        return err;
			
 
				+    }
			
 
				+    s->be_port = err;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
--- a/hw/i386/kvm/xen_xenstore.h
+++ b/hw/i386/kvm/xen_xenstore.h
@@ -0,0 +1,20 @@
 
				+/*
			
 
				+ * QEMU Xen emulation: Xenstore emulation
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * Authors: David Woodhouse <dwmw2@infradead.org>
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_XEN_XENSTORE_H
			
 
				+#define QEMU_XEN_XENSTORE_H
			
 
				+
			
 
				+void xen_xenstore_create(void);
			
 
				+int xen_xenstore_reset(void);
			
 
				+
			
 
				+uint16_t xen_xenstore_get_port(void);
			
 
				+
			
 
				+#endif /* QEMU_XEN_XENSTORE_H */
			
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -90,6 +90,10 @@
 
				 #include "hw/virtio/virtio-iommu.h"
			
 
				 #include "hw/virtio/virtio-pmem-pci.h"
			
 
				 #include "hw/virtio/virtio-mem-pci.h"
			
 
				+#include "hw/i386/kvm/xen_overlay.h"
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+#include "hw/i386/kvm/xen_gnttab.h"
			
 
				+#include "hw/i386/kvm/xen_xenstore.h"
			
 
				 #include "hw/mem/memory-device.h"
			
 
				 #include "sysemu/replay.h"
			
 
				 #include "target/i386/cpu.h"
			
@@ -1308,6 +1312,15 @@ void pc_basic_device_init(struct PCMachineState *pcms,
 
				     }
			
 
				     *rtc_state = ISA_DEVICE(mc146818_rtc_init(isa_bus, 2000, rtc_irq));
			
 
				 
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        xen_evtchn_connect_gsis(gsi);
			
 
				+        if (pcms->bus) {
			
 
				+            pci_create_simple(pcms->bus, -1, "xen-platform");
			
 
				+        }
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				     qemu_register_boot_set(pc_boot_set, *rtc_state);
			
 
				 
			
 
				     if (!xen_enabled() &&
			
@@ -1846,6 +1859,19 @@ static void pc_machine_initfn(Object *obj)
 
				     cxl_machine_init(obj, &pcms->cxl_devices_state);
			
 
				 }
			
 
				 
			
 
				+int pc_machine_kvm_type(MachineState *machine, const char *kvm_type)
			
 
				+{
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        xen_overlay_create();
			
 
				+        xen_evtchn_create();
			
 
				+        xen_gnttab_create();
			
 
				+        xen_xenstore_create();
			
 
				+    }
			
 
				+#endif
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				 static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
			
 
				 {
			
 
				     CPUState *cs;
			
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -61,6 +61,11 @@
 
				 #include CONFIG_DEVICES
			
 
				 #include "kvm/kvm_i386.h"
			
 
				 
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+#endif
			
 
				+
			
 
				 /* Physical Address of PVH entry point read from kernel ELF NOTE */
			
 
				 static size_t pvh_start_addr;
			
 
				 
			
@@ -610,6 +615,17 @@ void gsi_handler(void *opaque, int n, int level)
 
				         }
			
 
				         /* fall through */
			
 
				     case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+        /*
			
 
				+         * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
			
 
				+         * routing actually works properly under Xen). And then to
			
 
				+         * *either* the PIRQ handling or the I/OAPIC depending on
			
 
				+         * whether the former wants it.
			
 
				+         */
			
 
				+        if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
			
 
				+            break;
			
 
				+        }
			
 
				+#endif
			
 
				         qemu_set_irq(s->ioapic_irq[n], level);
			
 
				         break;
			
 
				     case IO_APIC_SECONDARY_IRQBASE
			
--- a/hw/i386/xen/meson.build
+++ b/hw/i386/xen/meson.build
@@ -2,6 +2,9 @@ i386_ss.add(when: 'CONFIG_XEN', if_true: files(
 
				   'xen-hvm.c',
			
 
				   'xen-mapcache.c',
			
 
				   'xen_apic.c',
			
 
				-  'xen_platform.c',
			
 
				   'xen_pvdevice.c',
			
 
				 ))
			
 
				+
			
 
				+i386_ss.add(when: 'CONFIG_XEN_BUS', if_true: files(
			
 
				+  'xen_platform.c',
			
 
				+))
			
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -1502,13 +1502,7 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
 
				     device_listener_register(&state->device_listener);
			
 
				 
			
 
				     xen_bus_init();
			
 
				-
			
 
				-    /* Initialize backend core & drivers */
			
 
				-    if (xen_be_init() != 0) {
			
 
				-        error_report("xen backend core setup failed");
			
 
				-        goto err;
			
 
				-    }
			
 
				-    xen_be_register_common();
			
 
				+    xen_be_init();
			
 
				 
			
 
				     QLIST_INIT(&xen_physmap);
			
 
				     xen_read_physmap(state);
			
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -27,9 +27,9 @@
 
				 #include "qapi/error.h"
			
 
				 #include "hw/ide/pci.h"
			
 
				 #include "hw/pci/pci.h"
			
 
				-#include "hw/xen/xen_common.h"
			
 
				 #include "migration/vmstate.h"
			
 
				-#include "hw/xen/xen-legacy-backend.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "net/net.h"
			
 
				 #include "trace.h"
			
 
				 #include "sysemu/xen.h"
			
 
				 #include "sysemu/block-backend.h"
			
@@ -37,6 +37,11 @@
 
				 #include "qemu/module.h"
			
 
				 #include "qom/object.h"
			
 
				 
			
 
				+#ifdef CONFIG_XEN
			
 
				+#include "hw/xen/xen_common.h"
			
 
				+#include "hw/xen/xen-legacy-backend.h"
			
 
				+#endif
			
 
				+
			
 
				 //#define DEBUG_PLATFORM
			
 
				 
			
 
				 #ifdef DEBUG_PLATFORM
			
@@ -108,12 +113,25 @@ static void log_writeb(PCIXenPlatformState *s, char val)
 
				 #define _UNPLUG_NVME_DISKS 3
			
 
				 #define UNPLUG_NVME_DISKS (1u << _UNPLUG_NVME_DISKS)
			
 
				 
			
 
				+static bool pci_device_is_passthrough(PCIDevice *d)
			
 
				+{
			
 
				+    if (!strcmp(d->name, "xen-pci-passthrough")) {
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_mode == XEN_EMULATE && !strcmp(d->name, "vfio-pci")) {
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
			
 
				 {
			
 
				     /* We have to ignore passthrough devices */
			
 
				     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
			
 
				             PCI_CLASS_NETWORK_ETHERNET
			
 
				-            && strcmp(d->name, "xen-pci-passthrough") != 0) {
			
 
				+            && !pci_device_is_passthrough(d)) {
			
 
				         object_unparent(OBJECT(d));
			
 
				     }
			
 
				 }
			
@@ -186,9 +204,8 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
 
				         !(flags & UNPLUG_IDE_SCSI_DISKS);
			
 
				 
			
 
				     /* We have to ignore passthrough devices */
			
 
				-    if (!strcmp(d->name, "xen-pci-passthrough")) {
			
 
				+    if (pci_device_is_passthrough(d))
			
 
				         return;
			
 
				-    }
			
 
				 
			
 
				     switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
			
 
				     case PCI_CLASS_STORAGE_IDE:
			
@@ -267,18 +284,26 @@ static void platform_fixed_ioport_writeb(void *opaque, uint32_t addr, uint32_t v
 
				     PCIXenPlatformState *s = opaque;
			
 
				 
			
 
				     switch (addr) {
			
 
				-    case 0: /* Platform flags */ {
			
 
				-        hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ?
			
 
				-            HVMMEM_ram_ro : HVMMEM_ram_rw;
			
 
				-        if (xen_set_mem_type(xen_domid, mem_type, 0xc0, 0x40)) {
			
 
				-            DPRINTF("unable to change ro/rw state of ROM memory area!\n");
			
 
				-        } else {
			
 
				+    case 0: /* Platform flags */
			
 
				+        if (xen_mode == XEN_EMULATE) {
			
 
				+            /* XX: Use i440gx/q35 PAM setup to do this? */
			
 
				             s->flags = val & PFFLAG_ROM_LOCK;
			
 
				-            DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n",
			
 
				-                    (mem_type == HVMMEM_ram_ro ? "ro":"rw"));
			
 
				+#ifdef CONFIG_XEN
			
 
				+        } else {
			
 
				+            hvmmem_type_t mem_type = (val & PFFLAG_ROM_LOCK) ?
			
 
				+                HVMMEM_ram_ro : HVMMEM_ram_rw;
			
 
				+
			
 
				+            if (xen_set_mem_type(xen_domid, mem_type, 0xc0, 0x40)) {
			
 
				+                DPRINTF("unable to change ro/rw state of ROM memory area!\n");
			
 
				+            } else {
			
 
				+                s->flags = val & PFFLAG_ROM_LOCK;
			
 
				+                DPRINTF("changed ro/rw state of ROM memory area. now is %s state.\n",
			
 
				+                        (mem_type == HVMMEM_ram_ro ? "ro" : "rw"));
			
 
				+            }
			
 
				+#endif
			
 
				         }
			
 
				         break;
			
 
				-    }
			
 
				+
			
 
				     case 2:
			
 
				         log_writeb(s, val);
			
 
				         break;
			
@@ -496,8 +521,8 @@ static void xen_platform_realize(PCIDevice *dev, Error **errp)
 
				     uint8_t *pci_conf;
			
 
				 
			
 
				     /* Device will crash on reset if xen is not initialized */
			
 
				-    if (!xen_enabled()) {
			
 
				-        error_setg(errp, "xen-platform device requires the Xen accelerator");
			
 
				+    if (xen_mode == XEN_DISABLED) {
			
 
				+        error_setg(errp, "xen-platform device requires a Xen guest");
			
 
				         return;
			
 
				     }
			
 
				 
			
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -24,6 +24,8 @@
 
				 #include "qemu/range.h"
			
 
				 #include "qapi/error.h"
			
 
				 
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+
			
 
				 /* PCI_MSI_ADDRESS_LO */
			
 
				 #define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
			
 
				 
			
@@ -414,6 +416,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
 
				     fprintf(stderr, "\n");
			
 
				 #endif
			
 
				 
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
			
 
				+            MSIMessage msg = msi_prepare_message(dev, vector);
			
 
				+
			
 
				+            xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
			
 
				+                                 msi_is_masked(dev, vector));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
			
 
				         return;
			
 
				     }
			
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -26,6 +26,8 @@
 
				 #include "qapi/error.h"
			
 
				 #include "trace.h"
			
 
				 
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+
			
 
				 /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
			
 
				 #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
			
 
				 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
			
@@ -124,6 +126,13 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
 
				 {
			
 
				     bool is_masked = msix_is_masked(dev, vector);
			
 
				 
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        MSIMessage msg = msix_prepare_message(dev, vector);
			
 
				+
			
 
				+        xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data,
			
 
				+                             is_masked);
			
 
				+    }
			
 
				+
			
 
				     if (is_masked == was_masked) {
			
 
				         return;
			
 
				     }
			
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -49,6 +49,9 @@
 
				 #include "qemu/cutils.h"
			
 
				 #include "pci-internal.h"
			
 
				 
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+
			
 
				 //#define DEBUG_PCI
			
 
				 #ifdef DEBUG_PCI
			
 
				 # define PCI_DPRINTF(format, ...)       printf(format, ## __VA_ARGS__)
			
@@ -319,6 +322,17 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
 
				 {
			
 
				     MemTxAttrs attrs = {};
			
 
				 
			
 
				+    /*
			
 
				+     * Xen uses the high bits of the address to contain some of the bits
			
 
				+     * of the PIRQ#. Therefore we can't just send the write cycle and
			
 
				+     * trust that it's caught by the APIC at 0xfee00000 because the
			
 
				+     * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166.
			
 
				+     * So we intercept the delivery here instead of in kvm_send_msi().
			
 
				+     */
			
 
				+    if (xen_mode == XEN_EMULATE &&
			
 
				+        xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) {
			
 
				+        return;
			
 
				+    }
			
 
				     attrs.requester_id = pci_requester_id(dev);
			
 
				     address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
			
 
				                          attrs, NULL);
			
@@ -988,6 +1002,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
 
				     pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
			
 
				     pci_config_free(pci_dev);
			
 
				 
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        xen_evtchn_remove_pci_device(pci_dev);
			
 
				+    }
			
 
				     if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
			
 
				         memory_region_del_subregion(&pci_dev->bus_master_container_region,
			
 
				                                     &pci_dev->bus_master_enable_region);
			
--- a/hw/xen/Kconfig
+++ b/hw/xen/Kconfig
@@ -0,0 +1,3 @@
 
				+config XEN_BUS
			
 
				+    bool
			
 
				+    default y if (XEN || XEN_EMU)
			
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -676,21 +676,30 @@ void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev)
 
				 }
			
 
				 /* -------------------------------------------------------------------- */
			
 
				 
			
 
				-int xen_be_init(void)
			
 
				+static void xen_set_dynamic_sysbus(void)
			
 
				+{
			
 
				+    Object *machine = qdev_get_machine();
			
 
				+    ObjectClass *oc = object_get_class(machine);
			
 
				+    MachineClass *mc = MACHINE_CLASS(oc);
			
 
				+
			
 
				+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_XENSYSDEV);
			
 
				+}
			
 
				+
			
 
				+void xen_be_init(void)
			
 
				 {
			
 
				     xengnttab_handle *gnttabdev;
			
 
				 
			
 
				     xenstore = xs_daemon_open();
			
 
				     if (!xenstore) {
			
 
				         xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
			
 
				-        return -1;
			
 
				+        exit(1);
			
 
				     }
			
 
				 
			
 
				     qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL);
			
 
				 
			
 
				     if (xen_xc == NULL || xen_fmem == NULL) {
			
 
				-        /* Check if xen_init() have been called */
			
 
				-        goto err;
			
 
				+        xen_pv_printf(NULL, 0, "Xen operations not set up\n");
			
 
				+        exit(1);
			
 
				     }
			
 
				 
			
 
				     gnttabdev = xengnttab_open(NULL, 0);
			
@@ -706,23 +715,16 @@ int xen_be_init(void)
 
				     xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
			
 
				     qbus_set_bus_hotplug_handler(xen_sysbus);
			
 
				 
			
 
				-    return 0;
			
 
				-
			
 
				-err:
			
 
				-    qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL);
			
 
				-    xs_daemon_close(xenstore);
			
 
				-    xenstore = NULL;
			
 
				-
			
 
				-    return -1;
			
 
				-}
			
 
				-
			
 
				-static void xen_set_dynamic_sysbus(void)
			
 
				-{
			
 
				-    Object *machine = qdev_get_machine();
			
 
				-    ObjectClass *oc = object_get_class(machine);
			
 
				-    MachineClass *mc = MACHINE_CLASS(oc);
			
 
				+    xen_set_dynamic_sysbus();
			
 
				 
			
 
				-    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_XENSYSDEV);
			
 
				+    xen_be_register("console", &xen_console_ops);
			
 
				+    xen_be_register("vkbd", &xen_kbdmouse_ops);
			
 
				+#ifdef CONFIG_VIRTFS
			
 
				+    xen_be_register("9pfs", &xen_9pfs_ops);
			
 
				+#endif
			
 
				+#ifdef CONFIG_USB_LIBUSB
			
 
				+    xen_be_register("qusb", &xen_usb_ops);
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 int xen_be_register(const char *type, struct XenDevOps *ops)
			
@@ -744,20 +746,6 @@ int xen_be_register(const char *type, struct XenDevOps *ops)
 
				     return xenstore_scan(type, xen_domid, ops);
			
 
				 }
			
 
				 
			
 
				-void xen_be_register_common(void)
			
 
				-{
			
 
				-    xen_set_dynamic_sysbus();
			
 
				-
			
 
				-    xen_be_register("console", &xen_console_ops);
			
 
				-    xen_be_register("vkbd", &xen_kbdmouse_ops);
			
 
				-#ifdef CONFIG_VIRTFS
			
 
				-    xen_be_register("9pfs", &xen_9pfs_ops);
			
 
				-#endif
			
 
				-#ifdef CONFIG_USB_LIBUSB
			
 
				-    xen_be_register("qusb", &xen_usb_ops);
			
 
				-#endif
			
 
				-}
			
 
				-
			
 
				 int xen_be_bind_evtchn(struct XenLegacyDevice *xendev)
			
 
				 {
			
 
				     if (xendev->local_port != -1) {
			
--- a/hw/xenpv/xen_machine_pv.c
+++ b/hw/xenpv/xen_machine_pv.c
@@ -36,10 +36,7 @@ static void xen_init_pv(MachineState *machine)
 
				     int i;
			
 
				 
			
 
				     /* Initialize backend core & drivers */
			
 
				-    if (xen_be_init() != 0) {
			
 
				-        error_report("%s: xen backend core setup failed", __func__);
			
 
				-        exit(1);
			
 
				-    }
			
 
				+    xen_be_init();
			
 
				 
			
 
				     switch (xen_mode) {
			
 
				     case XEN_ATTACH:
			
@@ -55,7 +52,6 @@ static void xen_init_pv(MachineState *machine)
 
				         break;
			
 
				     }
			
 
				 
			
 
				-    xen_be_register_common();
			
 
				     xen_be_register("vfb", &xen_framebuffer_ops);
			
 
				     xen_be_register("qnic", &xen_netdev_ops);
			
 
				 
			
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -291,12 +291,15 @@ extern const size_t pc_compat_1_5_len;
 
				 extern GlobalProperty pc_compat_1_4[];
			
 
				 extern const size_t pc_compat_1_4_len;
			
 
				 
			
 
				+int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
			
 
				+
			
 
				 #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
			
 
				     static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
			
 
				     { \
			
 
				         MachineClass *mc = MACHINE_CLASS(oc); \
			
 
				         optsfn(mc); \
			
 
				         mc->init = initfn; \
			
 
				+        mc->kvm_type = pc_machine_kvm_type; \
			
 
				     } \
			
 
				     static const TypeInfo pc_machine_type_##suffix = { \
			
 
				         .name       = namestr TYPE_MACHINE_SUFFIX, \
			
--- a/include/hw/pci/msi.h
+++ b/include/hw/pci/msi.h
@@ -33,6 +33,7 @@ extern bool msi_nonbroken;
 
				 void msi_set_message(PCIDevice *dev, MSIMessage msg);
			
 
				 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
			
 
				 bool msi_enabled(const PCIDevice *dev);
			
 
				+void msi_set_enabled(PCIDevice *dev);
			
 
				 int msi_init(struct PCIDevice *dev, uint8_t offset,
			
 
				              unsigned int nr_vectors, bool msi64bit,
			
 
				              bool msi_per_vector_mask, Error **errp);
			
--- a/include/hw/xen/interface/arch-arm.h
+++ b/include/hw/xen/interface/arch-arm.h
@@ -0,0 +1,510 @@
 
				+/******************************************************************************
			
 
				+ * arch-arm.h
			
 
				+ *
			
 
				+ * Guest OS interface to ARM Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright 2011 (C) Citrix Systems
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_ARCH_ARM_H__
			
 
				+#define __XEN_PUBLIC_ARCH_ARM_H__
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 50 arm_abi Hypercall Calling Convention
			
 
				+ *
			
 
				+ * A hypercall is issued using the ARM HVC instruction.
			
 
				+ *
			
 
				+ * A hypercall can take up to 5 arguments. These are passed in
			
 
				+ * registers, the first argument in x0/r0 (for arm64/arm32 guests
			
 
				+ * respectively irrespective of whether the underlying hypervisor is
			
 
				+ * 32- or 64-bit), the second argument in x1/r1, the third in x2/r2,
			
 
				+ * the forth in x3/r3 and the fifth in x4/r4.
			
 
				+ *
			
 
				+ * The hypercall number is passed in r12 (arm) or x16 (arm64). In both
			
 
				+ * cases the relevant ARM procedure calling convention specifies this
			
 
				+ * is an inter-procedure-call scratch register (e.g. for use in linker
			
 
				+ * stubs). This use does not conflict with use during a hypercall.
			
 
				+ *
			
 
				+ * The HVC ISS must contain a Xen specific TAG: XEN_HYPERCALL_TAG.
			
 
				+ *
			
 
				+ * The return value is in x0/r0.
			
 
				+ *
			
 
				+ * The hypercall will clobber x16/r12 and the argument registers used
			
 
				+ * by that hypercall (except r0 which is the return value) i.e. in
			
 
				+ * addition to x16/r12 a 2 argument hypercall will clobber x1/r1 and a
			
 
				+ * 4 argument hypercall will clobber x1/r1, x2/r2 and x3/r3.
			
 
				+ *
			
 
				+ * Parameter structs passed to hypercalls are laid out according to
			
 
				+ * the Procedure Call Standard for the ARM Architecture (AAPCS, AKA
			
 
				+ * EABI) and Procedure Call Standard for the ARM 64-bit Architecture
			
 
				+ * (AAPCS64). Where there is a conflict the 64-bit standard should be
			
 
				+ * used regardless of guest type. Structures which are passed as
			
 
				+ * hypercall arguments are always little endian.
			
 
				+ *
			
 
				+ * All memory which is shared with other entities in the system
			
 
				+ * (including the hypervisor and other guests) must reside in memory
			
 
				+ * which is mapped as Normal Inner Write-Back Outer Write-Back Inner-Shareable.
			
 
				+ * This applies to:
			
 
				+ *  - hypercall arguments passed via a pointer to guest memory.
			
 
				+ *  - memory shared via the grant table mechanism (including PV I/O
			
 
				+ *    rings etc).
			
 
				+ *  - memory shared with the hypervisor (struct shared_info, struct
			
 
				+ *    vcpu_info, the grant table, etc).
			
 
				+ *
			
 
				+ * Any cache allocation hints are acceptable.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 55 arm_hcall Supported Hypercalls
			
 
				+ *
			
 
				+ * Xen on ARM makes extensive use of hardware facilities and therefore
			
 
				+ * only a subset of the potential hypercalls are required.
			
 
				+ *
			
 
				+ * Since ARM uses second stage paging any machine/physical addresses
			
 
				+ * passed to hypercalls are Guest Physical Addresses (Intermediate
			
 
				+ * Physical Addresses) unless otherwise noted.
			
 
				+ *
			
 
				+ * The following hypercalls (and sub operations) are supported on the
			
 
				+ * ARM platform. Other hypercalls should be considered
			
 
				+ * unavailable/unsupported.
			
 
				+ *
			
 
				+ *  HYPERVISOR_memory_op
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ *  HYPERVISOR_domctl
			
 
				+ *   All generic sub-operations, with the exception of:
			
 
				+ *    * XEN_DOMCTL_irq_permission (not yet implemented)
			
 
				+ *
			
 
				+ *  HYPERVISOR_sched_op
			
 
				+ *   All generic sub-operations, with the exception of:
			
 
				+ *    * SCHEDOP_block -- prefer wfi hardware instruction
			
 
				+ *
			
 
				+ *  HYPERVISOR_console_io
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ *  HYPERVISOR_xen_version
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ *  HYPERVISOR_event_channel_op
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ *  HYPERVISOR_physdev_op
			
 
				+ *   Exactly these sub-operations are supported:
			
 
				+ *   PHYSDEVOP_pci_device_add
			
 
				+ *   PHYSDEVOP_pci_device_remove
			
 
				+ *
			
 
				+ *  HYPERVISOR_sysctl
			
 
				+ *   All generic sub-operations, with the exception of:
			
 
				+ *    * XEN_SYSCTL_page_offline_op
			
 
				+ *    * XEN_SYSCTL_get_pmstat
			
 
				+ *    * XEN_SYSCTL_pm_op
			
 
				+ *
			
 
				+ *  HYPERVISOR_hvm_op
			
 
				+ *   Exactly these sub-operations are supported:
			
 
				+ *    * HVMOP_set_param
			
 
				+ *    * HVMOP_get_param
			
 
				+ *
			
 
				+ *  HYPERVISOR_grant_table_op
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ *  HYPERVISOR_vcpu_op
			
 
				+ *   Exactly these sub-operations are supported:
			
 
				+ *    * VCPUOP_register_vcpu_info
			
 
				+ *    * VCPUOP_register_runstate_memory_area
			
 
				+ *
			
 
				+ *  HYPERVISOR_argo_op
			
 
				+ *   All generic sub-operations
			
 
				+ *
			
 
				+ * Other notes on the ARM ABI:
			
 
				+ *
			
 
				+ * - struct start_info is not exported to ARM guests.
			
 
				+ *
			
 
				+ * - struct shared_info is mapped by ARM guests using the
			
 
				+ *   HYPERVISOR_memory_op sub-op XENMEM_add_to_physmap, passing
			
 
				+ *   XENMAPSPACE_shared_info as space parameter.
			
 
				+ *
			
 
				+ * - All the per-cpu struct vcpu_info are mapped by ARM guests using the
			
 
				+ *   HYPERVISOR_vcpu_op sub-op VCPUOP_register_vcpu_info, including cpu0
			
 
				+ *   struct vcpu_info.
			
 
				+ *
			
 
				+ * - The grant table is mapped using the HYPERVISOR_memory_op sub-op
			
 
				+ *   XENMEM_add_to_physmap, passing XENMAPSPACE_grant_table as space
			
 
				+ *   parameter. The memory range specified under the Xen compatible
			
 
				+ *   hypervisor node on device tree can be used as target gpfn for the
			
 
				+ *   mapping.
			
 
				+ *
			
 
				+ * - Xenstore is initialized by using the two hvm_params
			
 
				+ *   HVM_PARAM_STORE_PFN and HVM_PARAM_STORE_EVTCHN. They can be read
			
 
				+ *   with the HYPERVISOR_hvm_op sub-op HVMOP_get_param.
			
 
				+ *
			
 
				+ * - The paravirtualized console is initialized by using the two
			
 
				+ *   hvm_params HVM_PARAM_CONSOLE_PFN and HVM_PARAM_CONSOLE_EVTCHN. They
			
 
				+ *   can be read with the HYPERVISOR_hvm_op sub-op HVMOP_get_param.
			
 
				+ *
			
 
				+ * - Event channel notifications are delivered using the percpu GIC
			
 
				+ *   interrupt specified under the Xen compatible hypervisor node on
			
 
				+ *   device tree.
			
 
				+ *
			
 
				+ * - The device tree Xen compatible node is fully described under Linux
			
 
				+ *   at Documentation/devicetree/bindings/arm/xen.txt.
			
 
				+ */
			
 
				+
			
 
				+#define XEN_HYPERCALL_TAG   0XEA1
			
 
				+
			
 
				+#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
			
 
				+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \
			
 
				+    typedef union { type *p; unsigned long q; }                 \
			
 
				+        __guest_handle_ ## name;                                \
			
 
				+    typedef union { type *p; uint64_aligned_t q; }              \
			
 
				+        __guest_handle_64_ ## name
			
 
				+
			
 
				+/*
			
 
				+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
			
 
				+ * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes
			
 
				+ * aligned.
			
 
				+ * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an
			
 
				+ * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64.
			
 
				+ */
			
 
				+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
			
 
				+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
			
 
				+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
			
 
				+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
			
 
				+#define __XEN_GUEST_HANDLE(name)        __guest_handle_64_ ## name
			
 
				+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
			
 
				+#define XEN_GUEST_HANDLE_PARAM(name)    __guest_handle_ ## name
			
 
				+#define set_xen_guest_handle_raw(hnd, val)                  \
			
 
				+    do {                                                    \
			
 
				+        __typeof__(&(hnd)) _sxghr_tmp = &(hnd);             \
			
 
				+        _sxghr_tmp->q = 0;                                  \
			
 
				+        _sxghr_tmp->p = val;                                \
			
 
				+    } while ( 0 )
			
 
				+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
			
 
				+
			
 
				+typedef uint64_t xen_pfn_t;
			
 
				+#define PRI_xen_pfn PRIx64
			
 
				+#define PRIu_xen_pfn PRIu64
			
 
				+
			
 
				+/*
			
 
				+ * Maximum number of virtual CPUs in legacy multi-processor guests.
			
 
				+ * Only one. All other VCPUS must use VCPUOP_register_vcpu_info.
			
 
				+ */
			
 
				+#define XEN_LEGACY_MAX_VCPUS 1
			
 
				+
			
 
				+typedef uint64_t xen_ulong_t;
			
 
				+#define PRI_xen_ulong PRIx64
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
			
 
				+/* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */
			
 
				+# define __DECL_REG(n64, n32) union {          \
			
 
				+        uint64_t n64;                          \
			
 
				+        uint32_t n32;                          \
			
 
				+    }
			
 
				+#else
			
 
				+/* Non-gcc sources must always use the proper 64-bit name (e.g., x0). */
			
 
				+#define __DECL_REG(n64, n32) uint64_t n64
			
 
				+#endif
			
 
				+
			
 
				+struct vcpu_guest_core_regs
			
 
				+{
			
 
				+    /*         Aarch64       Aarch32 */
			
 
				+    __DECL_REG(x0,           r0_usr);
			
 
				+    __DECL_REG(x1,           r1_usr);
			
 
				+    __DECL_REG(x2,           r2_usr);
			
 
				+    __DECL_REG(x3,           r3_usr);
			
 
				+    __DECL_REG(x4,           r4_usr);
			
 
				+    __DECL_REG(x5,           r5_usr);
			
 
				+    __DECL_REG(x6,           r6_usr);
			
 
				+    __DECL_REG(x7,           r7_usr);
			
 
				+    __DECL_REG(x8,           r8_usr);
			
 
				+    __DECL_REG(x9,           r9_usr);
			
 
				+    __DECL_REG(x10,          r10_usr);
			
 
				+    __DECL_REG(x11,          r11_usr);
			
 
				+    __DECL_REG(x12,          r12_usr);
			
 
				+
			
 
				+    __DECL_REG(x13,          sp_usr);
			
 
				+    __DECL_REG(x14,          lr_usr);
			
 
				+
			
 
				+    __DECL_REG(x15,          __unused_sp_hyp);
			
 
				+
			
 
				+    __DECL_REG(x16,          lr_irq);
			
 
				+    __DECL_REG(x17,          sp_irq);
			
 
				+
			
 
				+    __DECL_REG(x18,          lr_svc);
			
 
				+    __DECL_REG(x19,          sp_svc);
			
 
				+
			
 
				+    __DECL_REG(x20,          lr_abt);
			
 
				+    __DECL_REG(x21,          sp_abt);
			
 
				+
			
 
				+    __DECL_REG(x22,          lr_und);
			
 
				+    __DECL_REG(x23,          sp_und);
			
 
				+
			
 
				+    __DECL_REG(x24,          r8_fiq);
			
 
				+    __DECL_REG(x25,          r9_fiq);
			
 
				+    __DECL_REG(x26,          r10_fiq);
			
 
				+    __DECL_REG(x27,          r11_fiq);
			
 
				+    __DECL_REG(x28,          r12_fiq);
			
 
				+
			
 
				+    __DECL_REG(x29,          sp_fiq);
			
 
				+    __DECL_REG(x30,          lr_fiq);
			
 
				+
			
 
				+    /* Return address and mode */
			
 
				+    __DECL_REG(pc64,         pc32);             /* ELR_EL2 */
			
 
				+    uint64_t cpsr;                              /* SPSR_EL2 */
			
 
				+
			
 
				+    union {
			
 
				+        uint64_t spsr_el1;       /* AArch64 */
			
 
				+        uint32_t spsr_svc;       /* AArch32 */
			
 
				+    };
			
 
				+
			
 
				+    /* AArch32 guests only */
			
 
				+    uint32_t spsr_fiq, spsr_irq, spsr_und, spsr_abt;
			
 
				+
			
 
				+    /* AArch64 guests only */
			
 
				+    uint64_t sp_el0;
			
 
				+    uint64_t sp_el1, elr_el1;
			
 
				+};
			
 
				+typedef struct vcpu_guest_core_regs vcpu_guest_core_regs_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t);
			
 
				+
			
 
				+#undef __DECL_REG
			
 
				+
			
 
				+struct vcpu_guest_context {
			
 
				+#define _VGCF_online                   0
			
 
				+#define VGCF_online                    (1<<_VGCF_online)
			
 
				+    uint32_t flags;                         /* VGCF_* */
			
 
				+
			
 
				+    struct vcpu_guest_core_regs user_regs;  /* Core CPU registers */
			
 
				+
			
 
				+    uint64_t sctlr;
			
 
				+    uint64_t ttbcr, ttbr0, ttbr1;
			
 
				+};
			
 
				+typedef struct vcpu_guest_context vcpu_guest_context_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
			
 
				+
			
 
				+/*
			
 
				+ * struct xen_arch_domainconfig's ABI is covered by
			
 
				+ * XEN_DOMCTL_INTERFACE_VERSION.
			
 
				+ */
			
 
				+#define XEN_DOMCTL_CONFIG_GIC_NATIVE    0
			
 
				+#define XEN_DOMCTL_CONFIG_GIC_V2        1
			
 
				+#define XEN_DOMCTL_CONFIG_GIC_V3        2
			
 
				+
			
 
				+#define XEN_DOMCTL_CONFIG_TEE_NONE      0
			
 
				+#define XEN_DOMCTL_CONFIG_TEE_OPTEE     1
			
 
				+
			
 
				+struct xen_arch_domainconfig {
			
 
				+    /* IN/OUT */
			
 
				+    uint8_t gic_version;
			
 
				+    /* IN */
			
 
				+    uint16_t tee_type;
			
 
				+    /* IN */
			
 
				+    uint32_t nr_spis;
			
 
				+    /*
			
 
				+     * OUT
			
 
				+     * Based on the property clock-frequency in the DT timer node.
			
 
				+     * The property may be present when the bootloader/firmware doesn't
			
 
				+     * set correctly CNTFRQ which hold the timer frequency.
			
 
				+     *
			
 
				+     * As it's not possible to trap this register, we have to replicate
			
 
				+     * the value in the guest DT.
			
 
				+     *
			
 
				+     * = 0 => property not present
			
 
				+     * > 0 => Value of the property
			
 
				+     *
			
 
				+     */
			
 
				+    uint32_t clock_frequency;
			
 
				+};
			
 
				+#endif /* __XEN__ || __XEN_TOOLS__ */
			
 
				+
			
 
				+struct arch_vcpu_info {
			
 
				+};
			
 
				+typedef struct arch_vcpu_info arch_vcpu_info_t;
			
 
				+
			
 
				+struct arch_shared_info {
			
 
				+};
			
 
				+typedef struct arch_shared_info arch_shared_info_t;
			
 
				+typedef uint64_t xen_callback_t;
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+/* PSR bits (CPSR, SPSR) */
			
 
				+
			
 
				+#define PSR_THUMB       (1<<5)        /* Thumb Mode enable */
			
 
				+#define PSR_FIQ_MASK    (1<<6)        /* Fast Interrupt mask */
			
 
				+#define PSR_IRQ_MASK    (1<<7)        /* Interrupt mask */
			
 
				+#define PSR_ABT_MASK    (1<<8)        /* Asynchronous Abort mask */
			
 
				+#define PSR_BIG_ENDIAN  (1<<9)        /* arm32: Big Endian Mode */
			
 
				+#define PSR_DBG_MASK    (1<<9)        /* arm64: Debug Exception mask */
			
 
				+#define PSR_IT_MASK     (0x0600fc00)  /* Thumb If-Then Mask */
			
 
				+#define PSR_JAZELLE     (1<<24)       /* Jazelle Mode */
			
 
				+
			
 
				+/* 32 bit modes */
			
 
				+#define PSR_MODE_USR 0x10
			
 
				+#define PSR_MODE_FIQ 0x11
			
 
				+#define PSR_MODE_IRQ 0x12
			
 
				+#define PSR_MODE_SVC 0x13
			
 
				+#define PSR_MODE_MON 0x16
			
 
				+#define PSR_MODE_ABT 0x17
			
 
				+#define PSR_MODE_HYP 0x1a
			
 
				+#define PSR_MODE_UND 0x1b
			
 
				+#define PSR_MODE_SYS 0x1f
			
 
				+
			
 
				+/* 64 bit modes */
			
 
				+#define PSR_MODE_BIT  0x10 /* Set iff AArch32 */
			
 
				+#define PSR_MODE_EL3h 0x0d
			
 
				+#define PSR_MODE_EL3t 0x0c
			
 
				+#define PSR_MODE_EL2h 0x09
			
 
				+#define PSR_MODE_EL2t 0x08
			
 
				+#define PSR_MODE_EL1h 0x05
			
 
				+#define PSR_MODE_EL1t 0x04
			
 
				+#define PSR_MODE_EL0t 0x00
			
 
				+
			
 
				+#define PSR_GUEST32_INIT  (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC)
			
 
				+#define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h)
			
 
				+
			
 
				+#define SCTLR_GUEST_INIT    xen_mk_ullong(0x00c50078)
			
 
				+
			
 
				+/*
			
 
				+ * Virtual machine platform (memory layout, interrupts)
			
 
				+ *
			
 
				+ * These are defined for consistency between the tools and the
			
 
				+ * hypervisor. Guests must not rely on these hardcoded values but
			
 
				+ * should instead use the FDT.
			
 
				+ */
			
 
				+
			
 
				+/* Physical Address Space */
			
 
				+
			
 
				+/*
			
 
				+ * vGIC mappings: Only one set of mapping is used by the guest.
			
 
				+ * Therefore they can overlap.
			
 
				+ */
			
 
				+
			
 
				+/* vGIC v2 mappings */
			
 
				+#define GUEST_GICD_BASE   xen_mk_ullong(0x03001000)
			
 
				+#define GUEST_GICD_SIZE   xen_mk_ullong(0x00001000)
			
 
				+#define GUEST_GICC_BASE   xen_mk_ullong(0x03002000)
			
 
				+#define GUEST_GICC_SIZE   xen_mk_ullong(0x00002000)
			
 
				+
			
 
				+/* vGIC v3 mappings */
			
 
				+#define GUEST_GICV3_GICD_BASE      xen_mk_ullong(0x03001000)
			
 
				+#define GUEST_GICV3_GICD_SIZE      xen_mk_ullong(0x00010000)
			
 
				+
			
 
				+#define GUEST_GICV3_RDIST_REGIONS  1
			
 
				+
			
 
				+#define GUEST_GICV3_GICR0_BASE     xen_mk_ullong(0x03020000) /* vCPU0..127 */
			
 
				+#define GUEST_GICV3_GICR0_SIZE     xen_mk_ullong(0x01000000)
			
 
				+
			
 
				+/*
			
 
				+ * 256 MB is reserved for VPCI configuration space based on calculation
			
 
				+ * 256 buses x 32 devices x 8 functions x 4 KB = 256 MB
			
 
				+ */
			
 
				+#define GUEST_VPCI_ECAM_BASE    xen_mk_ullong(0x10000000)
			
 
				+#define GUEST_VPCI_ECAM_SIZE    xen_mk_ullong(0x10000000)
			
 
				+
			
 
				+/* ACPI tables physical address */
			
 
				+#define GUEST_ACPI_BASE xen_mk_ullong(0x20000000)
			
 
				+#define GUEST_ACPI_SIZE xen_mk_ullong(0x02000000)
			
 
				+
			
 
				+/* PL011 mappings */
			
 
				+#define GUEST_PL011_BASE    xen_mk_ullong(0x22000000)
			
 
				+#define GUEST_PL011_SIZE    xen_mk_ullong(0x00001000)
			
 
				+
			
 
				+/* Guest PCI-PCIe memory space where config space and BAR will be available.*/
			
 
				+#define GUEST_VPCI_ADDR_TYPE_MEM            xen_mk_ullong(0x02000000)
			
 
				+#define GUEST_VPCI_MEM_ADDR                 xen_mk_ullong(0x23000000)
			
 
				+#define GUEST_VPCI_MEM_SIZE                 xen_mk_ullong(0x10000000)
			
 
				+
			
 
				+/*
			
 
				+ * 16MB == 4096 pages reserved for guest to use as a region to map its
			
 
				+ * grant table in.
			
 
				+ */
			
 
				+#define GUEST_GNTTAB_BASE xen_mk_ullong(0x38000000)
			
 
				+#define GUEST_GNTTAB_SIZE xen_mk_ullong(0x01000000)
			
 
				+
			
 
				+#define GUEST_MAGIC_BASE  xen_mk_ullong(0x39000000)
			
 
				+#define GUEST_MAGIC_SIZE  xen_mk_ullong(0x01000000)
			
 
				+
			
 
				+#define GUEST_RAM_BANKS   2
			
 
				+
			
 
				+/*
			
 
				+ * The way to find the extended regions (to be exposed to the guest as unused
			
 
				+ * address space) relies on the fact that the regions reserved for the RAM
			
 
				+ * below are big enough to also accommodate such regions.
			
 
				+ */
			
 
				+#define GUEST_RAM0_BASE   xen_mk_ullong(0x40000000) /* 3GB of low RAM @ 1GB */
			
 
				+#define GUEST_RAM0_SIZE   xen_mk_ullong(0xc0000000)
			
 
				+
			
 
				+/* 4GB @ 4GB Prefetch Memory for VPCI */
			
 
				+#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM   xen_mk_ullong(0x42000000)
			
 
				+#define GUEST_VPCI_PREFETCH_MEM_ADDR        xen_mk_ullong(0x100000000)
			
 
				+#define GUEST_VPCI_PREFETCH_MEM_SIZE        xen_mk_ullong(0x100000000)
			
 
				+
			
 
				+#define GUEST_RAM1_BASE   xen_mk_ullong(0x0200000000) /* 1016GB of RAM @ 8GB */
			
 
				+#define GUEST_RAM1_SIZE   xen_mk_ullong(0xfe00000000)
			
 
				+
			
 
				+#define GUEST_RAM_BASE    GUEST_RAM0_BASE /* Lowest RAM address */
			
 
				+/* Largest amount of actual RAM, not including holes */
			
 
				+#define GUEST_RAM_MAX     (GUEST_RAM0_SIZE + GUEST_RAM1_SIZE)
			
 
				+/* Suitable for e.g. const uint64_t ramfoo[] = GUEST_RAM_BANK_FOOS; */
			
 
				+#define GUEST_RAM_BANK_BASES   { GUEST_RAM0_BASE, GUEST_RAM1_BASE }
			
 
				+#define GUEST_RAM_BANK_SIZES   { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE }
			
 
				+
			
 
				+/* Current supported guest VCPUs */
			
 
				+#define GUEST_MAX_VCPUS 128
			
 
				+
			
 
				+/* Interrupts */
			
 
				+#define GUEST_TIMER_VIRT_PPI    27
			
 
				+#define GUEST_TIMER_PHYS_S_PPI  29
			
 
				+#define GUEST_TIMER_PHYS_NS_PPI 30
			
 
				+#define GUEST_EVTCHN_PPI        31
			
 
				+
			
 
				+#define GUEST_VPL011_SPI        32
			
 
				+
			
 
				+/* PSCI functions */
			
 
				+#define PSCI_cpu_suspend 0
			
 
				+#define PSCI_cpu_off     1
			
 
				+#define PSCI_cpu_on      2
			
 
				+#define PSCI_migrate     3
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+/* Stub definition of PMU structure */
			
 
				+typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t;
			
 
				+#endif
			
 
				+
			
 
				+#endif /*  __XEN_PUBLIC_ARCH_ARM_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/arch-x86/cpuid.h
+++ b/include/hw/xen/interface/arch-x86/cpuid.h
@@ -0,0 +1,118 @@
 
				+/******************************************************************************
			
 
				+ * arch-x86/cpuid.h
			
 
				+ *
			
 
				+ * CPUID interface to Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2007 Citrix Systems, Inc.
			
 
				+ *
			
 
				+ * Authors:
			
 
				+ *    Keir Fraser <keir@xen.org>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
			
 
				+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
			
 
				+
			
 
				+/*
			
 
				+ * For compatibility with other hypervisor interfaces, the Xen cpuid leaves
			
 
				+ * can be found at the first otherwise unused 0x100 aligned boundary starting
			
 
				+ * from 0x40000000.
			
 
				+ *
			
 
				+ * e.g If viridian extensions are enabled for an HVM domain, the Xen cpuid
			
 
				+ * leaves will start at 0x40000100
			
 
				+ */
			
 
				+
			
 
				+#define XEN_CPUID_FIRST_LEAF 0x40000000
			
 
				+#define XEN_CPUID_LEAF(i)    (XEN_CPUID_FIRST_LEAF + (i))
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 1 (0x40000x00)
			
 
				+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
			
 
				+ *      are supported by the Xen host.
			
 
				+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
			
 
				+ *      of a Xen host.
			
 
				+ */
			
 
				+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
			
 
				+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
			
 
				+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 2 (0x40000x01)
			
 
				+ * EAX[31:16]: Xen major version.
			
 
				+ * EAX[15: 0]: Xen minor version.
			
 
				+ * EBX-EDX: Reserved (currently all zeroes).
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 3 (0x40000x02)
			
 
				+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
			
 
				+ *      to specify one hypercall page.
			
 
				+ * EBX: Base address of Xen-specific MSRs.
			
 
				+ * ECX: Features 1. Unused bits are set to zero.
			
 
				+ * EDX: Features 2. Unused bits are set to zero.
			
 
				+ */
			
 
				+
			
 
				+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
			
 
				+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
			
 
				+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 4 (0x40000x03)
			
 
				+ * Sub-leaf 0: EAX: bit 0: emulated tsc
			
 
				+ *                  bit 1: host tsc is known to be reliable
			
 
				+ *                  bit 2: RDTSCP instruction available
			
 
				+ *             EBX: tsc_mode: 0=default (emulate if necessary), 1=emulate,
			
 
				+ *                            2=no emulation, 3=no emulation + TSC_AUX support
			
 
				+ *             ECX: guest tsc frequency in kHz
			
 
				+ *             EDX: guest tsc incarnation (migration count)
			
 
				+ * Sub-leaf 1: EAX: tsc offset low part
			
 
				+ *             EBX: tsc offset high part
			
 
				+ *             ECX: multiplicator for tsc->ns conversion
			
 
				+ *             EDX: shift amount for tsc->ns conversion
			
 
				+ * Sub-leaf 2: EAX: host tsc frequency in kHz
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 5 (0x40000x04)
			
 
				+ * HVM-specific features
			
 
				+ * Sub-leaf 0: EAX: Features
			
 
				+ * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
			
 
				+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
			
 
				+ */
			
 
				+#define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
			
 
				+#define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC accesses */
			
 
				+/* Memory mapped from other domains has valid IOMMU entries */
			
 
				+#define XEN_HVM_CPUID_IOMMU_MAPPINGS   (1u << 2)
			
 
				+#define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
			
 
				+#define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
			
 
				+
			
 
				+/*
			
 
				+ * Leaf 6 (0x40000x05)
			
 
				+ * PV-specific parameters
			
 
				+ * Sub-leaf 0: EAX: max available sub-leaf
			
 
				+ * Sub-leaf 0: EBX: bits 0-7: max machine address width
			
 
				+ */
			
 
				+
			
 
				+/* Max. address width in bits taking memory hotplug into account. */
			
 
				+#define XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK (0xffu << 0)
			
 
				+
			
 
				+#define XEN_CPUID_MAX_NUM_LEAVES 5
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
			
--- a/include/hw/xen/interface/arch-x86/xen-x86_32.h
+++ b/include/hw/xen/interface/arch-x86/xen-x86_32.h
@@ -0,0 +1,194 @@
 
				+/******************************************************************************
			
 
				+ * xen-x86_32.h
			
 
				+ *
			
 
				+ * Guest OS interface to x86 32-bit Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2004-2007, K A Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
			
 
				+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
			
 
				+
			
 
				+/*
			
 
				+ * Hypercall interface:
			
 
				+ *  Input:  %ebx, %ecx, %edx, %esi, %edi, %ebp (arguments 1-6)
			
 
				+ *  Output: %eax
			
 
				+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
			
 
				+ *  call hypercall_page + hypercall-number * 32
			
 
				+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * These flat segments are in the Xen-private section of every GDT. Since these
			
 
				+ * are also present in the initial GDT, many OSes will be able to avoid
			
 
				+ * installing their own GDT.
			
 
				+ */
			
 
				+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
			
 
				+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
			
 
				+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
			
 
				+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
			
 
				+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
			
 
				+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
			
 
				+
			
 
				+#define FLAT_KERNEL_CS FLAT_RING1_CS
			
 
				+#define FLAT_KERNEL_DS FLAT_RING1_DS
			
 
				+#define FLAT_KERNEL_SS FLAT_RING1_SS
			
 
				+#define FLAT_USER_CS    FLAT_RING3_CS
			
 
				+#define FLAT_USER_DS    FLAT_RING3_DS
			
 
				+#define FLAT_USER_SS    FLAT_RING3_SS
			
 
				+
			
 
				+#define __HYPERVISOR_VIRT_START_PAE    0xF5800000
			
 
				+#define __MACH2PHYS_VIRT_START_PAE     0xF5800000
			
 
				+#define __MACH2PHYS_VIRT_END_PAE       0xF6800000
			
 
				+#define HYPERVISOR_VIRT_START_PAE      xen_mk_ulong(__HYPERVISOR_VIRT_START_PAE)
			
 
				+#define MACH2PHYS_VIRT_START_PAE       xen_mk_ulong(__MACH2PHYS_VIRT_START_PAE)
			
 
				+#define MACH2PHYS_VIRT_END_PAE         xen_mk_ulong(__MACH2PHYS_VIRT_END_PAE)
			
 
				+
			
 
				+/* Non-PAE bounds are obsolete. */
			
 
				+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
			
 
				+#define __MACH2PHYS_VIRT_START_NONPAE  0xFC000000
			
 
				+#define __MACH2PHYS_VIRT_END_NONPAE    0xFC400000
			
 
				+#define HYPERVISOR_VIRT_START_NONPAE   \
			
 
				+    xen_mk_ulong(__HYPERVISOR_VIRT_START_NONPAE)
			
 
				+#define MACH2PHYS_VIRT_START_NONPAE    \
			
 
				+    xen_mk_ulong(__MACH2PHYS_VIRT_START_NONPAE)
			
 
				+#define MACH2PHYS_VIRT_END_NONPAE      \
			
 
				+    xen_mk_ulong(__MACH2PHYS_VIRT_END_NONPAE)
			
 
				+
			
 
				+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
			
 
				+#define __MACH2PHYS_VIRT_START  __MACH2PHYS_VIRT_START_PAE
			
 
				+#define __MACH2PHYS_VIRT_END    __MACH2PHYS_VIRT_END_PAE
			
 
				+
			
 
				+#ifndef HYPERVISOR_VIRT_START
			
 
				+#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
			
 
				+#endif
			
 
				+
			
 
				+#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
			
 
				+#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
			
 
				+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
			
 
				+#ifndef machine_to_phys_mapping
			
 
				+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
			
 
				+#endif
			
 
				+
			
 
				+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+#undef ___DEFINE_XEN_GUEST_HANDLE
			
 
				+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \
			
 
				+    typedef struct { type *p; }                                 \
			
 
				+        __guest_handle_ ## name;                                \
			
 
				+    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
			
 
				+        __guest_handle_64_ ## name
			
 
				+#undef set_xen_guest_handle_raw
			
 
				+#define set_xen_guest_handle_raw(hnd, val)                  \
			
 
				+    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
			
 
				+         (hnd).p = val;                                     \
			
 
				+    } while ( 0 )
			
 
				+#define  int64_aligned_t  int64_t __attribute__((aligned(8)))
			
 
				+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
			
 
				+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
			
 
				+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+
			
 
				+#if defined(XEN_GENERATING_COMPAT_HEADERS)
			
 
				+/* nothing */
			
 
				+#elif defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax). */
			
 
				+#define __DECL_REG_LO8(which) union { \
			
 
				+    uint32_t e ## which ## x; \
			
 
				+    uint16_t which ## x; \
			
 
				+    struct { \
			
 
				+        uint8_t which ## l; \
			
 
				+        uint8_t which ## h; \
			
 
				+    }; \
			
 
				+}
			
 
				+#define __DECL_REG_LO16(name) union { \
			
 
				+    uint32_t e ## name, _e ## name; \
			
 
				+    uint16_t name; \
			
 
				+}
			
 
				+#else
			
 
				+/* Other sources must always use the proper 32-bit name (e.g., eax). */
			
 
				+#define __DECL_REG_LO8(which) uint32_t e ## which ## x
			
 
				+#define __DECL_REG_LO16(name) uint32_t e ## name
			
 
				+#endif
			
 
				+
			
 
				+struct cpu_user_regs {
			
 
				+    __DECL_REG_LO8(b);
			
 
				+    __DECL_REG_LO8(c);
			
 
				+    __DECL_REG_LO8(d);
			
 
				+    __DECL_REG_LO16(si);
			
 
				+    __DECL_REG_LO16(di);
			
 
				+    __DECL_REG_LO16(bp);
			
 
				+    __DECL_REG_LO8(a);
			
 
				+    uint16_t error_code;    /* private */
			
 
				+    uint16_t entry_vector;  /* private */
			
 
				+    __DECL_REG_LO16(ip);
			
 
				+    uint16_t cs;
			
 
				+    uint8_t  saved_upcall_mask;
			
 
				+    uint8_t  _pad0;
			
 
				+    __DECL_REG_LO16(flags); /* eflags.IF == !saved_upcall_mask */
			
 
				+    __DECL_REG_LO16(sp);
			
 
				+    uint16_t ss, _pad1;
			
 
				+    uint16_t es, _pad2;
			
 
				+    uint16_t ds, _pad3;
			
 
				+    uint16_t fs, _pad4;
			
 
				+    uint16_t gs, _pad5;
			
 
				+};
			
 
				+typedef struct cpu_user_regs cpu_user_regs_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
			
 
				+
			
 
				+#undef __DECL_REG_LO8
			
 
				+#undef __DECL_REG_LO16
			
 
				+
			
 
				+/*
			
 
				+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
			
 
				+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
			
 
				+ * must use the following accessor macros to pack/unpack valid MFNs.
			
 
				+ */
			
 
				+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
			
 
				+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
			
 
				+
			
 
				+struct arch_vcpu_info {
			
 
				+    unsigned long cr2;
			
 
				+    unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
			
 
				+};
			
 
				+typedef struct arch_vcpu_info arch_vcpu_info_t;
			
 
				+
			
 
				+struct xen_callback {
			
 
				+    unsigned long cs;
			
 
				+    unsigned long eip;
			
 
				+};
			
 
				+typedef struct xen_callback xen_callback_t;
			
 
				+
			
 
				+#endif /* !__ASSEMBLY__ */
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/arch-x86/xen-x86_64.h
+++ b/include/hw/xen/interface/arch-x86/xen-x86_64.h
@@ -0,0 +1,241 @@
 
				+/******************************************************************************
			
 
				+ * xen-x86_64.h
			
 
				+ *
			
 
				+ * Guest OS interface to x86 64-bit Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2004-2006, K A Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
			
 
				+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
			
 
				+
			
 
				+/*
			
 
				+ * Hypercall interface:
			
 
				+ *  Input:  %rdi, %rsi, %rdx, %r10, %r8, %r9 (arguments 1-6)
			
 
				+ *  Output: %rax
			
 
				+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
			
 
				+ *  call hypercall_page + hypercall-number * 32
			
 
				+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * 64-bit segment selectors
			
 
				+ * These flat segments are in the Xen-private section of every GDT. Since these
			
 
				+ * are also present in the initial GDT, many OSes will be able to avoid
			
 
				+ * installing their own GDT.
			
 
				+ */
			
 
				+
			
 
				+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */
			
 
				+#define FLAT_RING3_CS64 0xe033  /* GDT index 262 */
			
 
				+#define FLAT_RING3_DS32 0xe02b  /* GDT index 261 */
			
 
				+#define FLAT_RING3_DS64 0x0000  /* NULL selector */
			
 
				+#define FLAT_RING3_SS32 0xe02b  /* GDT index 261 */
			
 
				+#define FLAT_RING3_SS64 0xe02b  /* GDT index 261 */
			
 
				+
			
 
				+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
			
 
				+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
			
 
				+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64
			
 
				+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
			
 
				+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
			
 
				+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64
			
 
				+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
			
 
				+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
			
 
				+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64
			
 
				+
			
 
				+#define FLAT_USER_DS64 FLAT_RING3_DS64
			
 
				+#define FLAT_USER_DS32 FLAT_RING3_DS32
			
 
				+#define FLAT_USER_DS   FLAT_USER_DS64
			
 
				+#define FLAT_USER_CS64 FLAT_RING3_CS64
			
 
				+#define FLAT_USER_CS32 FLAT_RING3_CS32
			
 
				+#define FLAT_USER_CS   FLAT_USER_CS64
			
 
				+#define FLAT_USER_SS64 FLAT_RING3_SS64
			
 
				+#define FLAT_USER_SS32 FLAT_RING3_SS32
			
 
				+#define FLAT_USER_SS   FLAT_USER_SS64
			
 
				+
			
 
				+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
			
 
				+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
			
 
				+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
			
 
				+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
			
 
				+
			
 
				+#ifndef HYPERVISOR_VIRT_START
			
 
				+#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START)
			
 
				+#define HYPERVISOR_VIRT_END   xen_mk_ulong(__HYPERVISOR_VIRT_END)
			
 
				+#endif
			
 
				+
			
 
				+#define MACH2PHYS_VIRT_START  xen_mk_ulong(__MACH2PHYS_VIRT_START)
			
 
				+#define MACH2PHYS_VIRT_END    xen_mk_ulong(__MACH2PHYS_VIRT_END)
			
 
				+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
			
 
				+#ifndef machine_to_phys_mapping
			
 
				+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
			
 
				+ *  @which == SEGBASE_*  ;  @base == 64-bit base address
			
 
				+ * Returns 0 on success.
			
 
				+ */
			
 
				+#define SEGBASE_FS          0
			
 
				+#define SEGBASE_GS_USER     1
			
 
				+#define SEGBASE_GS_KERNEL   2
			
 
				+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
			
 
				+
			
 
				+/*
			
 
				+ * int HYPERVISOR_iret(void)
			
 
				+ * All arguments are on the kernel stack, in the following format.
			
 
				+ * Never returns if successful. Current kernel context is lost.
			
 
				+ * The saved CS is mapped as follows:
			
 
				+ *   RING0 -> RING3 kernel mode.
			
 
				+ *   RING1 -> RING3 kernel mode.
			
 
				+ *   RING2 -> RING3 kernel mode.
			
 
				+ *   RING3 -> RING3 user mode.
			
 
				+ * However RING0 indicates that the guest kernel should return to iteself
			
 
				+ * directly with
			
 
				+ *      orb   $3,1*8(%rsp)
			
 
				+ *      iretq
			
 
				+ * If flags contains VGCF_in_syscall:
			
 
				+ *   Restore RAX, RIP, RFLAGS, RSP.
			
 
				+ *   Discard R11, RCX, CS, SS.
			
 
				+ * Otherwise:
			
 
				+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
			
 
				+ * All other registers are saved on hypercall entry and restored to user.
			
 
				+ */
			
 
				+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
			
 
				+#define _VGCF_in_syscall 8
			
 
				+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)
			
 
				+#define VGCF_IN_SYSCALL  VGCF_in_syscall
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+
			
 
				+struct iret_context {
			
 
				+    /* Top of stack (%rsp at point of hypercall). */
			
 
				+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
			
 
				+    /* Bottom of iret stack frame. */
			
 
				+};
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax/rax). */
			
 
				+#define __DECL_REG_LOHI(which) union { \
			
 
				+    uint64_t r ## which ## x; \
			
 
				+    uint32_t e ## which ## x; \
			
 
				+    uint16_t which ## x; \
			
 
				+    struct { \
			
 
				+        uint8_t which ## l; \
			
 
				+        uint8_t which ## h; \
			
 
				+    }; \
			
 
				+}
			
 
				+#define __DECL_REG_LO8(name) union { \
			
 
				+    uint64_t r ## name; \
			
 
				+    uint32_t e ## name; \
			
 
				+    uint16_t name; \
			
 
				+    uint8_t name ## l; \
			
 
				+}
			
 
				+#define __DECL_REG_LO16(name) union { \
			
 
				+    uint64_t r ## name; \
			
 
				+    uint32_t e ## name; \
			
 
				+    uint16_t name; \
			
 
				+}
			
 
				+#define __DECL_REG_HI(num) union { \
			
 
				+    uint64_t r ## num; \
			
 
				+    uint32_t r ## num ## d; \
			
 
				+    uint16_t r ## num ## w; \
			
 
				+    uint8_t r ## num ## b; \
			
 
				+}
			
 
				+#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
			
 
				+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
			
 
				+#define __DECL_REG(name) union { \
			
 
				+    uint64_t r ## name, e ## name; \
			
 
				+    uint32_t _e ## name; \
			
 
				+}
			
 
				+#else
			
 
				+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
			
 
				+#define __DECL_REG(name) uint64_t r ## name
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __DECL_REG_LOHI
			
 
				+#define __DECL_REG_LOHI(name) __DECL_REG(name ## x)
			
 
				+#define __DECL_REG_LO8        __DECL_REG
			
 
				+#define __DECL_REG_LO16       __DECL_REG
			
 
				+#define __DECL_REG_HI(num)    uint64_t r ## num
			
 
				+#endif
			
 
				+
			
 
				+struct cpu_user_regs {
			
 
				+    __DECL_REG_HI(15);
			
 
				+    __DECL_REG_HI(14);
			
 
				+    __DECL_REG_HI(13);
			
 
				+    __DECL_REG_HI(12);
			
 
				+    __DECL_REG_LO8(bp);
			
 
				+    __DECL_REG_LOHI(b);
			
 
				+    __DECL_REG_HI(11);
			
 
				+    __DECL_REG_HI(10);
			
 
				+    __DECL_REG_HI(9);
			
 
				+    __DECL_REG_HI(8);
			
 
				+    __DECL_REG_LOHI(a);
			
 
				+    __DECL_REG_LOHI(c);
			
 
				+    __DECL_REG_LOHI(d);
			
 
				+    __DECL_REG_LO8(si);
			
 
				+    __DECL_REG_LO8(di);
			
 
				+    uint32_t error_code;    /* private */
			
 
				+    uint32_t entry_vector;  /* private */
			
 
				+    __DECL_REG_LO16(ip);
			
 
				+    uint16_t cs, _pad0[1];
			
 
				+    uint8_t  saved_upcall_mask;
			
 
				+    uint8_t  _pad1[3];
			
 
				+    __DECL_REG_LO16(flags); /* rflags.IF == !saved_upcall_mask */
			
 
				+    __DECL_REG_LO8(sp);
			
 
				+    uint16_t ss, _pad2[3];
			
 
				+    uint16_t es, _pad3[3];
			
 
				+    uint16_t ds, _pad4[3];
			
 
				+    uint16_t fs, _pad5[3];
			
 
				+    uint16_t gs, _pad6[3];
			
 
				+};
			
 
				+typedef struct cpu_user_regs cpu_user_regs_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
			
 
				+
			
 
				+#undef __DECL_REG
			
 
				+#undef __DECL_REG_LOHI
			
 
				+#undef __DECL_REG_LO8
			
 
				+#undef __DECL_REG_LO16
			
 
				+#undef __DECL_REG_HI
			
 
				+
			
 
				+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
			
 
				+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
			
 
				+
			
 
				+struct arch_vcpu_info {
			
 
				+    unsigned long cr2;
			
 
				+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
			
 
				+};
			
 
				+typedef struct arch_vcpu_info arch_vcpu_info_t;
			
 
				+
			
 
				+typedef unsigned long xen_callback_t;
			
 
				+
			
 
				+#endif /* !__ASSEMBLY__ */
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/arch-x86/xen.h
+++ b/include/hw/xen/interface/arch-x86/xen.h
@@ -0,0 +1,398 @@
 
				+/******************************************************************************
			
 
				+ * arch-x86/xen.h
			
 
				+ *
			
 
				+ * Guest OS interface to x86 Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2004-2006, K A Fraser
			
 
				+ */
			
 
				+
			
 
				+#include "../xen.h"
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
			
 
				+#define __XEN_PUBLIC_ARCH_X86_XEN_H__
			
 
				+
			
 
				+/* Structural guest handles introduced in 0x00030201. */
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
			
 
				+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
			
 
				+    typedef struct { type *p; } __guest_handle_ ## name
			
 
				+#else
			
 
				+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
			
 
				+    typedef type * __guest_handle_ ## name
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
			
 
				+ * in a struct in memory.
			
 
				+ * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
			
 
				+ * hypercall argument.
			
 
				+ * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
			
 
				+ * they might not be on other architectures.
			
 
				+ */
			
 
				+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
			
 
				+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
			
 
				+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
			
 
				+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
			
 
				+#define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
			
 
				+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
			
 
				+#define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)
			
 
				+#define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)
			
 
				+#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
			
 
				+
			
 
				+#if defined(__i386__)
			
 
				+# ifdef __XEN__
			
 
				+__DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x
			
 
				+__DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; }
			
 
				+# endif
			
 
				+#include "xen-x86_32.h"
			
 
				+# ifdef __XEN__
			
 
				+__UnDeF__ __DECL_REG_LO8
			
 
				+__UnDeF__ __DECL_REG_LO16
			
 
				+__DeFiNe__ __DECL_REG_LO8(which) e ## which ## x
			
 
				+__DeFiNe__ __DECL_REG_LO16(name) e ## name
			
 
				+# endif
			
 
				+#elif defined(__x86_64__)
			
 
				+#include "xen-x86_64.h"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+typedef unsigned long xen_pfn_t;
			
 
				+#define PRI_xen_pfn "lx"
			
 
				+#define PRIu_xen_pfn "lu"
			
 
				+#endif
			
 
				+
			
 
				+#define XEN_HAVE_PV_GUEST_ENTRY 1
			
 
				+
			
 
				+#define XEN_HAVE_PV_UPCALL_MASK 1
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 200 segdesc Segment Descriptor Tables
			
 
				+ */
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);
			
 
				+ * `
			
 
				+ */
			
 
				+/*
			
 
				+ * A number of GDT entries are reserved by Xen. These are not situated at the
			
 
				+ * start of the GDT because some stupid OSes export hard-coded selector values
			
 
				+ * in their ABI. These hard-coded values are always near the start of the GDT,
			
 
				+ * so Xen places itself out of the way, at the far end of the GDT.
			
 
				+ *
			
 
				+ * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
			
 
				+ */
			
 
				+#define FIRST_RESERVED_GDT_PAGE  14
			
 
				+#define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
			
 
				+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);
			
 
				+ * `
			
 
				+ * ` @pa   The machine physical address of the descriptor to
			
 
				+ * `       update. Must be either a descriptor page or writable.
			
 
				+ * ` @desc The descriptor value to update, in the same format as a
			
 
				+ * `       native descriptor table entry.
			
 
				+ */
			
 
				+
			
 
				+/* Maximum number of virtual CPUs in legacy multi-processor guests. */
			
 
				+#define XEN_LEGACY_MAX_VCPUS 32
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+
			
 
				+typedef unsigned long xen_ulong_t;
			
 
				+#define PRI_xen_ulong "lx"
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);
			
 
				+ * `
			
 
				+ * Sets the stack segment and pointer for the current vcpu.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);
			
 
				+ * `
			
 
				+ */
			
 
				+/*
			
 
				+ * Send an array of these to HYPERVISOR_set_trap_table().
			
 
				+ * Terminate the array with a sentinel entry, with traps[].address==0.
			
 
				+ * The privilege level specifies which modes may enter a trap via a software
			
 
				+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
			
 
				+ * privilege levels as follows:
			
 
				+ *  Level == 0: Noone may enter
			
 
				+ *  Level == 1: Kernel may enter
			
 
				+ *  Level == 2: Kernel may enter
			
 
				+ *  Level == 3: Everyone may enter
			
 
				+ *
			
 
				+ * Note: For compatibility with kernels not setting up exception handlers
			
 
				+ *       early enough, Xen will avoid trying to inject #GP (and hence crash
			
 
				+ *       the domain) when an RDMSR would require this, but no handler was
			
 
				+ *       set yet. The precise conditions are implementation specific, and
			
 
				+ *       new code may not rely on such behavior anyway.
			
 
				+ */
			
 
				+#define TI_GET_DPL(_ti)      ((_ti)->flags & 3)
			
 
				+#define TI_GET_IF(_ti)       ((_ti)->flags & 4)
			
 
				+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
			
 
				+#define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
			
 
				+struct trap_info {
			
 
				+    uint8_t       vector;  /* exception vector                              */
			
 
				+    uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
			
 
				+    uint16_t      cs;      /* code selector                                 */
			
 
				+    unsigned long address; /* code offset                                   */
			
 
				+};
			
 
				+typedef struct trap_info trap_info_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(trap_info_t);
			
 
				+
			
 
				+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
			
 
				+
			
 
				+/*
			
 
				+ * The following is all CPU context. Note that the fpu_ctxt block is filled
			
 
				+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
			
 
				+ *
			
 
				+ * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all
			
 
				+ * information in this structure is updated, the fields read include: fpu_ctxt
			
 
				+ * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*].
			
 
				+ *
			
 
				+ * Note: VCPUOP_initialise for HVM guests is non-symetric with
			
 
				+ * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from hvm/hvm_vcpu.h
			
 
				+ */
			
 
				+struct vcpu_guest_context {
			
 
				+    /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
			
 
				+    struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
			
 
				+#define VGCF_I387_VALID                (1<<0)
			
 
				+#define VGCF_IN_KERNEL                 (1<<2)
			
 
				+#define _VGCF_i387_valid               0
			
 
				+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
			
 
				+#define _VGCF_in_kernel                2
			
 
				+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
			
 
				+#define _VGCF_failsafe_disables_events 3
			
 
				+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
			
 
				+#define _VGCF_syscall_disables_events  4
			
 
				+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
			
 
				+#define _VGCF_online                   5
			
 
				+#define VGCF_online                    (1<<_VGCF_online)
			
 
				+    unsigned long flags;                    /* VGCF_* flags                 */
			
 
				+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
			
 
				+    struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
			
 
				+    unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
			
 
				+    unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
			
 
				+    unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
			
 
				+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
			
 
				+    unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
			
 
				+    unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
			
 
				+#ifdef __i386__
			
 
				+    unsigned long event_callback_cs;        /* CS:EIP of event callback     */
			
 
				+    unsigned long event_callback_eip;
			
 
				+    unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
			
 
				+    unsigned long failsafe_callback_eip;
			
 
				+#else
			
 
				+    unsigned long event_callback_eip;
			
 
				+    unsigned long failsafe_callback_eip;
			
 
				+#ifdef __XEN__
			
 
				+    union {
			
 
				+        unsigned long syscall_callback_eip;
			
 
				+        struct {
			
 
				+            unsigned int event_callback_cs;    /* compat CS of event cb     */
			
 
				+            unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
			
 
				+        };
			
 
				+    };
			
 
				+#else
			
 
				+    unsigned long syscall_callback_eip;
			
 
				+#endif
			
 
				+#endif
			
 
				+    unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
			
 
				+#ifdef __x86_64__
			
 
				+    /* Segment base addresses. */
			
 
				+    uint64_t      fs_base;
			
 
				+    uint64_t      gs_base_kernel;
			
 
				+    uint64_t      gs_base_user;
			
 
				+#endif
			
 
				+};
			
 
				+typedef struct vcpu_guest_context vcpu_guest_context_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
			
 
				+
			
 
				+struct arch_shared_info {
			
 
				+    /*
			
 
				+     * Number of valid entries in the p2m table(s) anchored at
			
 
				+     * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
			
 
				+     */
			
 
				+    unsigned long max_pfn;
			
 
				+    /*
			
 
				+     * Frame containing list of mfns containing list of mfns containing p2m.
			
 
				+     * A value of 0 indicates it has not yet been set up, ~0 indicates it has
			
 
				+     * been set to invalid e.g. due to the p2m being too large for the 3-level
			
 
				+     * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr
			
 
				+     * is to be used.
			
 
				+     */
			
 
				+    xen_pfn_t     pfn_to_mfn_frame_list_list;
			
 
				+    unsigned long nmi_reason;
			
 
				+    /*
			
 
				+     * Following three fields are valid if p2m_cr3 contains a value different
			
 
				+     * from 0.
			
 
				+     * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
			
 
				+     * p2m_cr3 is in the same format as a cr3 value in the vcpu register state
			
 
				+     * and holds the folded machine frame number (via xen_pfn_to_cr3) of a
			
 
				+     * L3 or L4 page table.
			
 
				+     * p2m_vaddr holds the virtual address of the linear p2m list. All entries
			
 
				+     * in the range [0...max_pfn[ are accessible via this pointer.
			
 
				+     * p2m_generation will be incremented by the guest before and after each
			
 
				+     * change of the mappings of the p2m list. p2m_generation starts at 0 and
			
 
				+     * a value with the least significant bit set indicates that a mapping
			
 
				+     * update is in progress. This allows guest external software (e.g. in Dom0)
			
 
				+     * to verify that read mappings are consistent and whether they have changed
			
 
				+     * since the last check.
			
 
				+     * Modifying a p2m element in the linear p2m list is allowed via an atomic
			
 
				+     * write only.
			
 
				+     */
			
 
				+    unsigned long p2m_cr3;         /* cr3 value of the p2m address space */
			
 
				+    unsigned long p2m_vaddr;       /* virtual address of the p2m list */
			
 
				+    unsigned long p2m_generation;  /* generation count of p2m mapping */
			
 
				+#ifdef __i386__
			
 
				+    /* There's no room for this field in the generic structure. */
			
 
				+    uint32_t wc_sec_hi;
			
 
				+#endif
			
 
				+};
			
 
				+typedef struct arch_shared_info arch_shared_info_t;
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+/*
			
 
				+ * struct xen_arch_domainconfig's ABI is covered by
			
 
				+ * XEN_DOMCTL_INTERFACE_VERSION.
			
 
				+ */
			
 
				+struct xen_arch_domainconfig {
			
 
				+#define _XEN_X86_EMU_LAPIC          0
			
 
				+#define XEN_X86_EMU_LAPIC           (1U<<_XEN_X86_EMU_LAPIC)
			
 
				+#define _XEN_X86_EMU_HPET           1
			
 
				+#define XEN_X86_EMU_HPET            (1U<<_XEN_X86_EMU_HPET)
			
 
				+#define _XEN_X86_EMU_PM             2
			
 
				+#define XEN_X86_EMU_PM              (1U<<_XEN_X86_EMU_PM)
			
 
				+#define _XEN_X86_EMU_RTC            3
			
 
				+#define XEN_X86_EMU_RTC             (1U<<_XEN_X86_EMU_RTC)
			
 
				+#define _XEN_X86_EMU_IOAPIC         4
			
 
				+#define XEN_X86_EMU_IOAPIC          (1U<<_XEN_X86_EMU_IOAPIC)
			
 
				+#define _XEN_X86_EMU_PIC            5
			
 
				+#define XEN_X86_EMU_PIC             (1U<<_XEN_X86_EMU_PIC)
			
 
				+#define _XEN_X86_EMU_VGA            6
			
 
				+#define XEN_X86_EMU_VGA             (1U<<_XEN_X86_EMU_VGA)
			
 
				+#define _XEN_X86_EMU_IOMMU          7
			
 
				+#define XEN_X86_EMU_IOMMU           (1U<<_XEN_X86_EMU_IOMMU)
			
 
				+#define _XEN_X86_EMU_PIT            8
			
 
				+#define XEN_X86_EMU_PIT             (1U<<_XEN_X86_EMU_PIT)
			
 
				+#define _XEN_X86_EMU_USE_PIRQ       9
			
 
				+#define XEN_X86_EMU_USE_PIRQ        (1U<<_XEN_X86_EMU_USE_PIRQ)
			
 
				+#define _XEN_X86_EMU_VPCI           10
			
 
				+#define XEN_X86_EMU_VPCI            (1U<<_XEN_X86_EMU_VPCI)
			
 
				+
			
 
				+#define XEN_X86_EMU_ALL             (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET |  \
			
 
				+                                     XEN_X86_EMU_PM | XEN_X86_EMU_RTC |      \
			
 
				+                                     XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC |  \
			
 
				+                                     XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU |   \
			
 
				+                                     XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\
			
 
				+                                     XEN_X86_EMU_VPCI)
			
 
				+    uint32_t emulation_flags;
			
 
				+
			
 
				+/*
			
 
				+ * Select whether to use a relaxed behavior for accesses to MSRs not explicitly
			
 
				+ * handled by Xen instead of injecting a #GP to the guest. Note this option
			
 
				+ * doesn't allow the guest to read or write to the underlying MSR.
			
 
				+ */
			
 
				+#define XEN_X86_MSR_RELAXED (1u << 0)
			
 
				+    uint32_t misc_flags;
			
 
				+};
			
 
				+
			
 
				+/* Location of online VCPU bitmap. */
			
 
				+#define XEN_ACPI_CPU_MAP             0xaf00
			
 
				+#define XEN_ACPI_CPU_MAP_LEN         ((HVM_MAX_VCPUS + 7) / 8)
			
 
				+
			
 
				+/* GPE0 bit set during CPU hotplug */
			
 
				+#define XEN_ACPI_GPE0_CPUHP_BIT      2
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Representations of architectural CPUID and MSR information.  Used as the
			
 
				+ * serialised version of Xen's internal representation.
			
 
				+ */
			
 
				+typedef struct xen_cpuid_leaf {
			
 
				+#define XEN_CPUID_NO_SUBLEAF 0xffffffffu
			
 
				+    uint32_t leaf, subleaf;
			
 
				+    uint32_t a, b, c, d;
			
 
				+} xen_cpuid_leaf_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_cpuid_leaf_t);
			
 
				+
			
 
				+typedef struct xen_msr_entry {
			
 
				+    uint32_t idx;
			
 
				+    uint32_t flags; /* Reserved MBZ. */
			
 
				+    uint64_t val;
			
 
				+} xen_msr_entry_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_msr_entry_t);
			
 
				+
			
 
				+#endif /* !__ASSEMBLY__ */
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_fpu_taskswitch(int set);
			
 
				+ * `
			
 
				+ * Sets (if set!=0) or clears (if set==0) CR0.TS.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);
			
 
				+ *
			
 
				+ * ` unsigned long
			
 
				+ * ` HYPERVISOR_get_debugreg(int regno);
			
 
				+ * For 0<=reg<=7, returns the debug register value.
			
 
				+ * For other values of reg, returns ((unsigned long)-EINVAL).
			
 
				+ * (Unfortunately, this interface is defective.)
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Prefix forces emulation of some non-trapping instructions.
			
 
				+ * Currently only CPUID.
			
 
				+ */
			
 
				+#ifdef __ASSEMBLY__
			
 
				+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
			
 
				+#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
			
 
				+#else
			
 
				+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
			
 
				+#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Debug console IO port, also called "port E9 hack". Each character written
			
 
				+ * to this IO port will be printed on the hypervisor console, subject to log
			
 
				+ * level restrictions.
			
 
				+ */
			
 
				+#define XEN_HVM_DEBUGCONS_IOPORT 0xe9
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/event_channel.h
+++ b/include/hw/xen/interface/event_channel.h
@@ -0,0 +1,388 @@
 
				+/******************************************************************************
			
 
				+ * event_channel.h
			
 
				+ *
			
 
				+ * Event channels between domains.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2003-2004, K A Fraser.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
			
 
				+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
			
 
				+
			
 
				+#include "xen.h"
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 150 evtchn Event Channels
			
 
				+ *
			
 
				+ * Event channels are the basic primitive provided by Xen for event
			
 
				+ * notifications. An event is the Xen equivalent of a hardware
			
 
				+ * interrupt. They essentially store one bit of information, the event
			
 
				+ * of interest is signalled by transitioning this bit from 0 to 1.
			
 
				+ *
			
 
				+ * Notifications are received by a guest via an upcall from Xen,
			
 
				+ * indicating when an event arrives (setting the bit). Further
			
 
				+ * notifications are masked until the bit is cleared again (therefore,
			
 
				+ * guests must check the value of the bit after re-enabling event
			
 
				+ * delivery to ensure no missed notifications).
			
 
				+ *
			
 
				+ * Event notifications can be masked by setting a flag; this is
			
 
				+ * equivalent to disabling interrupts and can be used to ensure
			
 
				+ * atomicity of certain operations in the guest kernel.
			
 
				+ *
			
 
				+ * Event channels are represented by the evtchn_* fields in
			
 
				+ * struct shared_info and struct vcpu_info.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args)
			
 
				+ * `
			
 
				+ * @cmd  == EVTCHNOP_* (event-channel operation).
			
 
				+ * @args == struct evtchn_* Operation-specific extra arguments (NULL if none).
			
 
				+ */
			
 
				+
			
 
				+/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */
			
 
				+#define EVTCHNOP_bind_interdomain 0
			
 
				+#define EVTCHNOP_bind_virq        1
			
 
				+#define EVTCHNOP_bind_pirq        2
			
 
				+#define EVTCHNOP_close            3
			
 
				+#define EVTCHNOP_send             4
			
 
				+#define EVTCHNOP_status           5
			
 
				+#define EVTCHNOP_alloc_unbound    6
			
 
				+#define EVTCHNOP_bind_ipi         7
			
 
				+#define EVTCHNOP_bind_vcpu        8
			
 
				+#define EVTCHNOP_unmask           9
			
 
				+#define EVTCHNOP_reset           10
			
 
				+#define EVTCHNOP_init_control    11
			
 
				+#define EVTCHNOP_expand_array    12
			
 
				+#define EVTCHNOP_set_priority    13
			
 
				+#ifdef __XEN__
			
 
				+#define EVTCHNOP_reset_cont      14
			
 
				+#endif
			
 
				+/* ` } */
			
 
				+
			
 
				+typedef uint32_t evtchn_port_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
			
 
				+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
			
 
				+ * is allocated in <dom> and returned as <port>.
			
 
				+ * NOTES:
			
 
				+ *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
			
 
				+ *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
			
 
				+ */
			
 
				+struct evtchn_alloc_unbound {
			
 
				+    /* IN parameters */
			
 
				+    domid_t dom, remote_dom;
			
 
				+    /* OUT parameters */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
			
 
				+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
			
 
				+ * a port that is unbound and marked as accepting bindings from the calling
			
 
				+ * domain. A fresh port is allocated in the calling domain and returned as
			
 
				+ * <local_port>.
			
 
				+ *
			
 
				+ * In case the peer domain has already tried to set our event channel
			
 
				+ * pending, before it was bound, EVTCHNOP_bind_interdomain always sets
			
 
				+ * the local event channel pending.
			
 
				+ *
			
 
				+ * The usual pattern of use, in the guest's upcall (or subsequent
			
 
				+ * handler) is as follows: (Re-enable the event channel for subsequent
			
 
				+ * signalling and then) check for the existence of whatever condition
			
 
				+ * is being waited for by other means, and take whatever action is
			
 
				+ * needed (if any).
			
 
				+ *
			
 
				+ * NOTES:
			
 
				+ *  1. <remote_dom> may be DOMID_SELF, allowing loopback connections.
			
 
				+ */
			
 
				+struct evtchn_bind_interdomain {
			
 
				+    /* IN parameters. */
			
 
				+    domid_t remote_dom;
			
 
				+    evtchn_port_t remote_port;
			
 
				+    /* OUT parameters. */
			
 
				+    evtchn_port_t local_port;
			
 
				+};
			
 
				+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
			
 
				+ * vcpu.
			
 
				+ * NOTES:
			
 
				+ *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
			
 
				+ *     in xen.h for the classification of each VIRQ.
			
 
				+ *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be
			
 
				+ *     re-bound via EVTCHNOP_bind_vcpu.
			
 
				+ *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
			
 
				+ *     The allocated event channel is bound to the specified vcpu and the
			
 
				+ *     binding cannot be changed.
			
 
				+ */
			
 
				+struct evtchn_bind_virq {
			
 
				+    /* IN parameters. */
			
 
				+    uint32_t virq; /* enum virq */
			
 
				+    uint32_t vcpu;
			
 
				+    /* OUT parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>).
			
 
				+ * NOTES:
			
 
				+ *  1. A physical IRQ may be bound to at most one event channel per domain.
			
 
				+ *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
			
 
				+ */
			
 
				+struct evtchn_bind_pirq {
			
 
				+    /* IN parameters. */
			
 
				+    uint32_t pirq;
			
 
				+#define BIND_PIRQ__WILL_SHARE 1
			
 
				+    uint32_t flags; /* BIND_PIRQ__* */
			
 
				+    /* OUT parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
			
 
				+ * NOTES:
			
 
				+ *  1. The allocated event channel is bound to the specified vcpu. The binding
			
 
				+ *     may not be changed.
			
 
				+ */
			
 
				+struct evtchn_bind_ipi {
			
 
				+    uint32_t vcpu;
			
 
				+    /* OUT parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
			
 
				+ * interdomain then the remote end is placed in the unbound state
			
 
				+ * (EVTCHNSTAT_unbound), awaiting a new connection.
			
 
				+ */
			
 
				+struct evtchn_close {
			
 
				+    /* IN parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_close evtchn_close_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
			
 
				+ * endpoint is <port>.
			
 
				+ */
			
 
				+struct evtchn_send {
			
 
				+    /* IN parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_send evtchn_send_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_status: Get the current status of the communication channel which
			
 
				+ * has an endpoint at <dom, port>.
			
 
				+ * NOTES:
			
 
				+ *  1. <dom> may be specified as DOMID_SELF.
			
 
				+ *  2. Only a sufficiently-privileged domain may obtain the status of an event
			
 
				+ *     channel for which <dom> is not DOMID_SELF.
			
 
				+ */
			
 
				+struct evtchn_status {
			
 
				+    /* IN parameters */
			
 
				+    domid_t  dom;
			
 
				+    evtchn_port_t port;
			
 
				+    /* OUT parameters */
			
 
				+#define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
			
 
				+#define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
			
 
				+#define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */
			
 
				+#define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */
			
 
				+#define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */
			
 
				+#define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */
			
 
				+    uint32_t status;
			
 
				+    uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
			
 
				+    union {
			
 
				+        struct {
			
 
				+            domid_t dom;
			
 
				+        } unbound;                 /* EVTCHNSTAT_unbound */
			
 
				+        struct {
			
 
				+            domid_t dom;
			
 
				+            evtchn_port_t port;
			
 
				+        } interdomain;             /* EVTCHNSTAT_interdomain */
			
 
				+        uint32_t pirq;             /* EVTCHNSTAT_pirq        */
			
 
				+        uint32_t virq;             /* EVTCHNSTAT_virq        */
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct evtchn_status evtchn_status_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
			
 
				+ * event is pending.
			
 
				+ * NOTES:
			
 
				+ *  1. IPI-bound channels always notify the vcpu specified at bind time.
			
 
				+ *     This binding cannot be changed.
			
 
				+ *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
			
 
				+ *     This binding cannot be changed.
			
 
				+ *  3. All other channels notify vcpu0 by default. This default is set when
			
 
				+ *     the channel is allocated (a port that is freed and subsequently reused
			
 
				+ *     has its binding reset to vcpu0).
			
 
				+ */
			
 
				+struct evtchn_bind_vcpu {
			
 
				+    /* IN parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+    uint32_t vcpu;
			
 
				+};
			
 
				+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
			
 
				+ * a notification to the appropriate VCPU if an event is pending.
			
 
				+ */
			
 
				+struct evtchn_unmask {
			
 
				+    /* IN parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+};
			
 
				+typedef struct evtchn_unmask evtchn_unmask_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
			
 
				+ * NOTES:
			
 
				+ *  1. <dom> may be specified as DOMID_SELF.
			
 
				+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
			
 
				+ *  3. Destroys all control blocks and event array, resets event channel
			
 
				+ *     operations to 2-level ABI if called with <dom> == DOMID_SELF and FIFO
			
 
				+ *     ABI was used. Guests should not bind events during EVTCHNOP_reset call
			
 
				+ *     as these events are likely to be lost.
			
 
				+ */
			
 
				+struct evtchn_reset {
			
 
				+    /* IN parameters. */
			
 
				+    domid_t dom;
			
 
				+};
			
 
				+typedef struct evtchn_reset evtchn_reset_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_init_control: initialize the control block for the FIFO ABI.
			
 
				+ *
			
 
				+ * Note: any events that are currently pending will not be resent and
			
 
				+ * will be lost.  Guests should call this before binding any event to
			
 
				+ * avoid losing any events.
			
 
				+ */
			
 
				+struct evtchn_init_control {
			
 
				+    /* IN parameters. */
			
 
				+    uint64_t control_gfn;
			
 
				+    uint32_t offset;
			
 
				+    uint32_t vcpu;
			
 
				+    /* OUT parameters. */
			
 
				+    uint8_t link_bits;
			
 
				+    uint8_t _pad[7];
			
 
				+};
			
 
				+typedef struct evtchn_init_control evtchn_init_control_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_expand_array: add an additional page to the event array.
			
 
				+ */
			
 
				+struct evtchn_expand_array {
			
 
				+    /* IN parameters. */
			
 
				+    uint64_t array_gfn;
			
 
				+};
			
 
				+typedef struct evtchn_expand_array evtchn_expand_array_t;
			
 
				+
			
 
				+/*
			
 
				+ * EVTCHNOP_set_priority: set the priority for an event channel.
			
 
				+ */
			
 
				+struct evtchn_set_priority {
			
 
				+    /* IN parameters. */
			
 
				+    evtchn_port_t port;
			
 
				+    uint32_t priority;
			
 
				+};
			
 
				+typedef struct evtchn_set_priority evtchn_set_priority_t;
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
			
 
				+ * `
			
 
				+ * Superceded by new event_channel_op() hypercall since 0x00030202.
			
 
				+ */
			
 
				+struct evtchn_op {
			
 
				+    uint32_t cmd; /* enum event_channel_op */
			
 
				+    union {
			
 
				+        evtchn_alloc_unbound_t    alloc_unbound;
			
 
				+        evtchn_bind_interdomain_t bind_interdomain;
			
 
				+        evtchn_bind_virq_t        bind_virq;
			
 
				+        evtchn_bind_pirq_t        bind_pirq;
			
 
				+        evtchn_bind_ipi_t         bind_ipi;
			
 
				+        evtchn_close_t            close;
			
 
				+        evtchn_send_t             send;
			
 
				+        evtchn_status_t           status;
			
 
				+        evtchn_bind_vcpu_t        bind_vcpu;
			
 
				+        evtchn_unmask_t           unmask;
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct evtchn_op evtchn_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
			
 
				+
			
 
				+/*
			
 
				+ * 2-level ABI
			
 
				+ */
			
 
				+
			
 
				+#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
			
 
				+
			
 
				+/*
			
 
				+ * FIFO ABI
			
 
				+ */
			
 
				+
			
 
				+/* Events may have priorities from 0 (highest) to 15 (lowest). */
			
 
				+#define EVTCHN_FIFO_PRIORITY_MAX     0
			
 
				+#define EVTCHN_FIFO_PRIORITY_DEFAULT 7
			
 
				+#define EVTCHN_FIFO_PRIORITY_MIN     15
			
 
				+
			
 
				+#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1)
			
 
				+
			
 
				+typedef uint32_t event_word_t;
			
 
				+
			
 
				+#define EVTCHN_FIFO_PENDING 31
			
 
				+#define EVTCHN_FIFO_MASKED  30
			
 
				+#define EVTCHN_FIFO_LINKED  29
			
 
				+#define EVTCHN_FIFO_BUSY    28
			
 
				+
			
 
				+#define EVTCHN_FIFO_LINK_BITS 17
			
 
				+#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1)
			
 
				+
			
 
				+#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS)
			
 
				+
			
 
				+struct evtchn_fifo_control_block {
			
 
				+    uint32_t ready;
			
 
				+    uint32_t _rsvd;
			
 
				+    uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
			
 
				+};
			
 
				+typedef struct evtchn_fifo_control_block evtchn_fifo_control_block_t;
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/features.h
+++ b/include/hw/xen/interface/features.h
@@ -0,0 +1,143 @@
 
				+/******************************************************************************
			
 
				+ * features.h
			
 
				+ *
			
 
				+ * Feature flags, reported by XENVER_get_features.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_FEATURES_H__
			
 
				+#define __XEN_PUBLIC_FEATURES_H__
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 200 elfnotes_features XEN_ELFNOTE_FEATURES
			
 
				+ *
			
 
				+ * The list of all the features the guest supports. They are set by
			
 
				+ * parsing the XEN_ELFNOTE_FEATURES and XEN_ELFNOTE_SUPPORTED_FEATURES
			
 
				+ * string. The format is the  feature names (as given here without the
			
 
				+ * "XENFEAT_" prefix) separated by '|' characters.
			
 
				+ * If a feature is required for the kernel to function then the feature name
			
 
				+ * must be preceded by a '!' character.
			
 
				+ *
			
 
				+ * Note that if XEN_ELFNOTE_SUPPORTED_FEATURES is used, then in the
			
 
				+ * XENFEAT_dom0 MUST be set if the guest is to be booted as dom0,
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * If set, the guest does not need to write-protect its pagetables, and can
			
 
				+ * update them via direct writes.
			
 
				+ */
			
 
				+#define XENFEAT_writable_page_tables       0
			
 
				+
			
 
				+/*
			
 
				+ * If set, the guest does not need to write-protect its segment descriptor
			
 
				+ * tables, and can update them via direct writes.
			
 
				+ */
			
 
				+#define XENFEAT_writable_descriptor_tables 1
			
 
				+
			
 
				+/*
			
 
				+ * If set, translation between the guest's 'pseudo-physical' address space
			
 
				+ * and the host's machine address space are handled by the hypervisor. In this
			
 
				+ * mode the guest does not need to perform phys-to/from-machine translations
			
 
				+ * when performing page table operations.
			
 
				+ */
			
 
				+#define XENFEAT_auto_translated_physmap    2
			
 
				+
			
 
				+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
			
 
				+#define XENFEAT_supervisor_mode_kernel     3
			
 
				+
			
 
				+/*
			
 
				+ * If set, the guest does not need to allocate x86 PAE page directories
			
 
				+ * below 4GB. This flag is usually implied by auto_translated_physmap.
			
 
				+ */
			
 
				+#define XENFEAT_pae_pgdir_above_4gb        4
			
 
				+
			
 
				+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
			
 
				+#define XENFEAT_mmu_pt_update_preserve_ad  5
			
 
				+
			
 
				+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
			
 
				+#define XENFEAT_highmem_assist             6
			
 
				+
			
 
				+/*
			
 
				+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
			
 
				+ * available pte bits.
			
 
				+ */
			
 
				+#define XENFEAT_gnttab_map_avail_bits      7
			
 
				+
			
 
				+/* x86: Does this Xen host support the HVM callback vector type? */
			
 
				+#define XENFEAT_hvm_callback_vector        8
			
 
				+
			
 
				+/* x86: pvclock algorithm is safe to use on HVM */
			
 
				+#define XENFEAT_hvm_safe_pvclock           9
			
 
				+
			
 
				+/* x86: pirq can be used by HVM guests */
			
 
				+#define XENFEAT_hvm_pirqs                 10
			
 
				+
			
 
				+/* operation as Dom0 is supported */
			
 
				+#define XENFEAT_dom0                      11
			
 
				+
			
 
				+/* Xen also maps grant references at pfn = mfn.
			
 
				+ * This feature flag is deprecated and should not be used.
			
 
				+#define XENFEAT_grant_map_identity        12
			
 
				+ */
			
 
				+
			
 
				+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
			
 
				+#define XENFEAT_memory_op_vnode_supported 13
			
 
				+
			
 
				+/* arm: Hypervisor supports ARM SMC calling convention. */
			
 
				+#define XENFEAT_ARM_SMCCC_supported       14
			
 
				+
			
 
				+/*
			
 
				+ * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP
			
 
				+ * must be located in lower 1MB, as required by ACPI Specification for IA-PC
			
 
				+ * systems.
			
 
				+ * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains
			
 
				+ * the "linux" string.
			
 
				+ */
			
 
				+#define XENFEAT_linux_rsdp_unrestricted   15
			
 
				+
			
 
				+/*
			
 
				+ * A direct-mapped (or 1:1 mapped) domain is a domain for which its
			
 
				+ * local pages have gfn == mfn. If a domain is direct-mapped,
			
 
				+ * XENFEAT_direct_mapped is set; otherwise XENFEAT_not_direct_mapped
			
 
				+ * is set.
			
 
				+ *
			
 
				+ * If neither flag is set (e.g. older Xen releases) the assumptions are:
			
 
				+ * - not auto_translated domains (x86 only) are always direct-mapped
			
 
				+ * - on x86, auto_translated domains are not direct-mapped
			
 
				+ * - on ARM, Dom0 is direct-mapped, DomUs are not
			
 
				+ */
			
 
				+#define XENFEAT_not_direct_mapped         16
			
 
				+#define XENFEAT_direct_mapped             17
			
 
				+
			
 
				+#define XENFEAT_NR_SUBMAPS 1
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_FEATURES_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/grant_table.h
+++ b/include/hw/xen/interface/grant_table.h
@@ -28,9 +28,659 @@
 
				 #ifndef __XEN_PUBLIC_GRANT_TABLE_H__
			
 
				 #define __XEN_PUBLIC_GRANT_TABLE_H__
			
 
				 
			
 
				+#include "xen.h"
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 150 gnttab Grant Tables
			
 
				+ *
			
 
				+ * Xen's grant tables provide a generic mechanism to memory sharing
			
 
				+ * between domains. This shared memory interface underpins the split
			
 
				+ * device drivers for block and network IO.
			
 
				+ *
			
 
				+ * Each domain has its own grant table. This is a data structure that
			
 
				+ * is shared with Xen; it allows the domain to tell Xen what kind of
			
 
				+ * permissions other domains have on its pages. Entries in the grant
			
 
				+ * table are identified by grant references. A grant reference is an
			
 
				+ * integer, which indexes into the grant table. It acts as a
			
 
				+ * capability which the grantee can use to perform operations on the
			
 
				+ * granter's memory.
			
 
				+ *
			
 
				+ * This capability-based system allows shared-memory communications
			
 
				+ * between unprivileged domains. A grant reference also encapsulates
			
 
				+ * the details of a shared page, removing the need for a domain to
			
 
				+ * know the real machine address of a page it is sharing. This makes
			
 
				+ * it possible to share memory correctly with domains running in
			
 
				+ * fully virtualised memory.
			
 
				+ */
			
 
				+
			
 
				+/***********************************
			
 
				+ * GRANT TABLE REPRESENTATION
			
 
				+ */
			
 
				+
			
 
				+/* Some rough guidelines on accessing and updating grant-table entries
			
 
				+ * in a concurrency-safe manner. For more information, Linux contains a
			
 
				+ * reference implementation for guest OSes (drivers/xen/grant_table.c, see
			
 
				+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD
			
 
				+ *
			
 
				+ * NB. WMB is a no-op on current-generation x86 processors. However, a
			
 
				+ *     compiler barrier will still be required.
			
 
				+ *
			
 
				+ * Introducing a valid entry into the grant table:
			
 
				+ *  1. Write ent->domid.
			
 
				+ *  2. Write ent->frame:
			
 
				+ *      GTF_permit_access:   Frame to which access is permitted.
			
 
				+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
			
 
				+ *                           frame, or zero if none.
			
 
				+ *  3. Write memory barrier (WMB).
			
 
				+ *  4. Write ent->flags, inc. valid type.
			
 
				+ *
			
 
				+ * Invalidating an unused GTF_permit_access entry:
			
 
				+ *  1. flags = ent->flags.
			
 
				+ *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
			
 
				+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
			
 
				+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
			
 
				+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
			
 
				+ *
			
 
				+ * Invalidating an in-use GTF_permit_access entry:
			
 
				+ *  This cannot be done directly. Request assistance from the domain controller
			
 
				+ *  which can set a timeout on the use of a grant entry and take necessary
			
 
				+ *  action. (NB. This is not yet implemented!).
			
 
				+ *
			
 
				+ * Invalidating an unused GTF_accept_transfer entry:
			
 
				+ *  1. flags = ent->flags.
			
 
				+ *  2. Observe that !(flags & GTF_transfer_committed). [*]
			
 
				+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
			
 
				+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
			
 
				+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
			
 
				+ *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
			
 
				+ *      The guest must /not/ modify the grant entry until the address of the
			
 
				+ *      transferred frame is written. It is safe for the guest to spin waiting
			
 
				+ *      for this to occur (detect by observing GTF_transfer_completed in
			
 
				+ *      ent->flags).
			
 
				+ *
			
 
				+ * Invalidating a committed GTF_accept_transfer entry:
			
 
				+ *  1. Wait for (ent->flags & GTF_transfer_completed).
			
 
				+ *
			
 
				+ * Changing a GTF_permit_access from writable to read-only:
			
 
				+ *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
			
 
				+ *
			
 
				+ * Changing a GTF_permit_access from read-only to writable:
			
 
				+ *  Use SMP-safe bit-setting instruction.
			
 
				+ */
			
 
				+
			
 
				 /*
			
 
				  * Reference to a grant entry in a specified domain's grant table.
			
 
				  */
			
 
				 typedef uint32_t grant_ref_t;
			
 
				 
			
 
				+/*
			
 
				+ * A grant table comprises a packed array of grant entries in one or more
			
 
				+ * page frames shared between Xen and a guest.
			
 
				+ * [XEN]: This field is written by Xen and read by the sharing guest.
			
 
				+ * [GST]: This field is written by the guest and read by Xen.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Version 1 of the grant table entry structure is maintained largely for
			
 
				+ * backwards compatibility.  New guests are recommended to support using
			
 
				+ * version 2 to overcome version 1 limitations, but to default to version 1.
			
 
				+ */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x0003020a
			
 
				+#define grant_entry_v1 grant_entry
			
 
				+#define grant_entry_v1_t grant_entry_t
			
 
				+#endif
			
 
				+struct grant_entry_v1 {
			
 
				+    /* GTF_xxx: various type and flag information.  [XEN,GST] */
			
 
				+    uint16_t flags;
			
 
				+    /* The domain being granted foreign privileges. [GST] */
			
 
				+    domid_t  domid;
			
 
				+    /*
			
 
				+     * GTF_permit_access: GFN that @domid is allowed to map and access. [GST]
			
 
				+     * GTF_accept_transfer: GFN that @domid is allowed to transfer into. [GST]
			
 
				+     * GTF_transfer_completed: MFN whose ownership transferred by @domid
			
 
				+     *                         (non-translated guests only). [XEN]
			
 
				+     */
			
 
				+    uint32_t frame;
			
 
				+};
			
 
				+typedef struct grant_entry_v1 grant_entry_v1_t;
			
 
				+
			
 
				+/* The first few grant table entries will be preserved across grant table
			
 
				+ * version changes and may be pre-populated at domain creation by tools.
			
 
				+ */
			
 
				+#define GNTTAB_NR_RESERVED_ENTRIES     8
			
 
				+#define GNTTAB_RESERVED_CONSOLE        0
			
 
				+#define GNTTAB_RESERVED_XENSTORE       1
			
 
				+
			
 
				+/*
			
 
				+ * Type of grant entry.
			
 
				+ *  GTF_invalid: This grant entry grants no privileges.
			
 
				+ *  GTF_permit_access: Allow @domid to map/access @frame.
			
 
				+ *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
			
 
				+ *                       to this guest. Xen writes the page number to @frame.
			
 
				+ *  GTF_transitive: Allow @domid to transitively access a subrange of
			
 
				+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
			
 
				+ */
			
 
				+#define GTF_invalid         (0U<<0)
			
 
				+#define GTF_permit_access   (1U<<0)
			
 
				+#define GTF_accept_transfer (2U<<0)
			
 
				+#define GTF_transitive      (3U<<0)
			
 
				+#define GTF_type_mask       (3U<<0)
			
 
				+
			
 
				+/*
			
 
				+ * Subflags for GTF_permit_access and GTF_transitive.
			
 
				+ *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
			
 
				+ *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
			
 
				+ *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
			
 
				+ * Further subflags for GTF_permit_access only.
			
 
				+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags to be used for
			
 
				+ *                             mappings of the grant [GST]
			
 
				+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
			
 
				+ *                will only be allowed to copy from the grant, and not
			
 
				+ *                map it. [GST]
			
 
				+ */
			
 
				+#define _GTF_readonly       (2)
			
 
				+#define GTF_readonly        (1U<<_GTF_readonly)
			
 
				+#define _GTF_reading        (3)
			
 
				+#define GTF_reading         (1U<<_GTF_reading)
			
 
				+#define _GTF_writing        (4)
			
 
				+#define GTF_writing         (1U<<_GTF_writing)
			
 
				+#define _GTF_PWT            (5)
			
 
				+#define GTF_PWT             (1U<<_GTF_PWT)
			
 
				+#define _GTF_PCD            (6)
			
 
				+#define GTF_PCD             (1U<<_GTF_PCD)
			
 
				+#define _GTF_PAT            (7)
			
 
				+#define GTF_PAT             (1U<<_GTF_PAT)
			
 
				+#define _GTF_sub_page       (8)
			
 
				+#define GTF_sub_page        (1U<<_GTF_sub_page)
			
 
				+
			
 
				+/*
			
 
				+ * Subflags for GTF_accept_transfer:
			
 
				+ *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
			
 
				+ *      to transferring ownership of a page frame. When a guest sees this flag
			
 
				+ *      it must /not/ modify the grant entry until GTF_transfer_completed is
			
 
				+ *      set by Xen.
			
 
				+ *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
			
 
				+ *      after reading GTF_transfer_committed. Xen will always write the frame
			
 
				+ *      address, followed by ORing this flag, in a timely manner.
			
 
				+ */
			
 
				+#define _GTF_transfer_committed (2)
			
 
				+#define GTF_transfer_committed  (1U<<_GTF_transfer_committed)
			
 
				+#define _GTF_transfer_completed (3)
			
 
				+#define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
			
 
				+
			
 
				+/*
			
 
				+ * Version 2 grant table entries.  These fulfil the same role as
			
 
				+ * version 1 entries, but can represent more complicated operations.
			
 
				+ * Any given domain will have either a version 1 or a version 2 table,
			
 
				+ * and every entry in the table will be the same version.
			
 
				+ *
			
 
				+ * The interface by which domains use grant references does not depend
			
 
				+ * on the grant table version in use by the other domain.
			
 
				+ */
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
			
 
				+/*
			
 
				+ * Version 1 and version 2 grant entries share a common prefix.  The
			
 
				+ * fields of the prefix are documented as part of struct
			
 
				+ * grant_entry_v1.
			
 
				+ */
			
 
				+struct grant_entry_header {
			
 
				+    uint16_t flags;
			
 
				+    domid_t  domid;
			
 
				+};
			
 
				+typedef struct grant_entry_header grant_entry_header_t;
			
 
				+
			
 
				+/*
			
 
				+ * Version 2 of the grant entry structure.
			
 
				+ */
			
 
				+union grant_entry_v2 {
			
 
				+    grant_entry_header_t hdr;
			
 
				+
			
 
				+    /*
			
 
				+     * This member is used for V1-style full page grants, where either:
			
 
				+     *
			
 
				+     * -- hdr.type is GTF_accept_transfer, or
			
 
				+     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
			
 
				+     *
			
 
				+     * In that case, the frame field has the same semantics as the
			
 
				+     * field of the same name in the V1 entry structure.
			
 
				+     */
			
 
				+    struct {
			
 
				+        grant_entry_header_t hdr;
			
 
				+        uint32_t pad0;
			
 
				+        uint64_t frame;
			
 
				+    } full_page;
			
 
				+
			
 
				+    /*
			
 
				+     * If the grant type is GTF_grant_access and GTF_sub_page is set,
			
 
				+     * @domid is allowed to access bytes [@page_off,@page_off+@length)
			
 
				+     * in frame @frame.
			
 
				+     */
			
 
				+    struct {
			
 
				+        grant_entry_header_t hdr;
			
 
				+        uint16_t page_off;
			
 
				+        uint16_t length;
			
 
				+        uint64_t frame;
			
 
				+    } sub_page;
			
 
				+
			
 
				+    /*
			
 
				+     * If the grant is GTF_transitive, @domid is allowed to use the
			
 
				+     * grant @gref in domain @trans_domid, as if it was the local
			
 
				+     * domain.  Obviously, the transitive access must be compatible
			
 
				+     * with the original grant.
			
 
				+     *
			
 
				+     * The current version of Xen does not allow transitive grants
			
 
				+     * to be mapped.
			
 
				+     */
			
 
				+    struct {
			
 
				+        grant_entry_header_t hdr;
			
 
				+        domid_t trans_domid;
			
 
				+        uint16_t pad0;
			
 
				+        grant_ref_t gref;
			
 
				+    } transitive;
			
 
				+
			
 
				+    uint32_t __spacer[4]; /* Pad to a power of two */
			
 
				+};
			
 
				+typedef union grant_entry_v2 grant_entry_v2_t;
			
 
				+
			
 
				+typedef uint16_t grant_status_t;
			
 
				+
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ */
			
 
				+
			
 
				+/***********************************
			
 
				+ * GRANT TABLE QUERIES AND USES
			
 
				+ */
			
 
				+
			
 
				+/* ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd,
			
 
				+ * `                           void *args,
			
 
				+ * `                           unsigned int count)
			
 
				+ * `
			
 
				+ *
			
 
				+ * @args points to an array of a per-command data structure. The array
			
 
				+ * has @count members
			
 
				+ */
			
 
				+
			
 
				+/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */
			
 
				+#define GNTTABOP_map_grant_ref        0
			
 
				+#define GNTTABOP_unmap_grant_ref      1
			
 
				+#define GNTTABOP_setup_table          2
			
 
				+#define GNTTABOP_dump_table           3
			
 
				+#define GNTTABOP_transfer             4
			
 
				+#define GNTTABOP_copy                 5
			
 
				+#define GNTTABOP_query_size           6
			
 
				+#define GNTTABOP_unmap_and_replace    7
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
			
 
				+#define GNTTABOP_set_version          8
			
 
				+#define GNTTABOP_get_status_frames    9
			
 
				+#define GNTTABOP_get_version          10
			
 
				+#define GNTTABOP_swap_grant_ref	      11
			
 
				+#define GNTTABOP_cache_flush	      12
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ */
			
 
				+/* ` } */
			
 
				+
			
 
				+/*
			
 
				+ * Handle to track a mapping created via a grant reference.
			
 
				+ */
			
 
				+typedef uint32_t grant_handle_t;
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
			
 
				+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
			
 
				+ * that must be presented later to destroy the mapping(s). On error, <status>
			
 
				+ * is a negative status code.
			
 
				+ * NOTES:
			
 
				+ *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
			
 
				+ *     via which I/O devices may access the granted frame.
			
 
				+ *  2. If GNTMAP_host_map is specified then a mapping will be added at
			
 
				+ *     either a host virtual address in the current address space, or at
			
 
				+ *     a PTE at the specified machine address.  The type of mapping to
			
 
				+ *     perform is selected through the GNTMAP_contains_pte flag, and the
			
 
				+ *     address is specified in <host_addr>.
			
 
				+ *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
			
 
				+ *     host mapping is destroyed by other means then it is *NOT* guaranteed
			
 
				+ *     to be accounted to the correct grant reference!
			
 
				+ */
			
 
				+struct gnttab_map_grant_ref {
			
 
				+    /* IN parameters. */
			
 
				+    uint64_t host_addr;
			
 
				+    uint32_t flags;               /* GNTMAP_* */
			
 
				+    grant_ref_t ref;
			
 
				+    domid_t  dom;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+    grant_handle_t handle;
			
 
				+    uint64_t dev_bus_addr;
			
 
				+};
			
 
				+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
			
 
				+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
			
 
				+ * field is ignored. If non-zero, they must refer to a device/host mapping
			
 
				+ * that is tracked by <handle>
			
 
				+ * NOTES:
			
 
				+ *  1. The call may fail in an undefined manner if either mapping is not
			
 
				+ *     tracked by <handle>.
			
 
				+ *  3. After executing a batch of unmaps, it is guaranteed that no stale
			
 
				+ *     mappings will remain in the device or host TLBs.
			
 
				+ */
			
 
				+struct gnttab_unmap_grant_ref {
			
 
				+    /* IN parameters. */
			
 
				+    uint64_t host_addr;
			
 
				+    uint64_t dev_bus_addr;
			
 
				+    grant_handle_t handle;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+};
			
 
				+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
			
 
				+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
			
 
				+ * Only <nr_frames> addresses are written, even if the table is larger.
			
 
				+ * NOTES:
			
 
				+ *  1. <dom> may be specified as DOMID_SELF.
			
 
				+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
			
 
				+ *  3. Xen may not support more than a single grant-table page per domain.
			
 
				+ */
			
 
				+struct gnttab_setup_table {
			
 
				+    /* IN parameters. */
			
 
				+    domid_t  dom;
			
 
				+    uint32_t nr_frames;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040300
			
 
				+    XEN_GUEST_HANDLE(ulong) frame_list;
			
 
				+#else
			
 
				+    XEN_GUEST_HANDLE(xen_pfn_t) frame_list;
			
 
				+#endif
			
 
				+};
			
 
				+typedef struct gnttab_setup_table gnttab_setup_table_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
			
 
				+ * xen console. Debugging use only.
			
 
				+ */
			
 
				+struct gnttab_dump_table {
			
 
				+    /* IN parameters. */
			
 
				+    domid_t dom;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t status;               /* => enum grant_status */
			
 
				+};
			
 
				+typedef struct gnttab_dump_table gnttab_dump_table_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_transfer: Transfer <frame> to a foreign domain. The foreign domain
			
 
				+ * has previously registered its interest in the transfer via <domid, ref>.
			
 
				+ *
			
 
				+ * Note that, even if the transfer fails, the specified page no longer belongs
			
 
				+ * to the calling domain *unless* the error is GNTST_bad_page.
			
 
				+ *
			
 
				+ * Note further that only PV guests can use this operation.
			
 
				+ */
			
 
				+struct gnttab_transfer {
			
 
				+    /* IN parameters. */
			
 
				+    xen_pfn_t     mfn;
			
 
				+    domid_t       domid;
			
 
				+    grant_ref_t   ref;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t       status;
			
 
				+};
			
 
				+typedef struct gnttab_transfer gnttab_transfer_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_copy: Hypervisor based copy
			
 
				+ * source and destinations can be eithers MFNs or, for foreign domains,
			
 
				+ * grant references. the foreign domain has to grant read/write access
			
 
				+ * in its grant table.
			
 
				+ *
			
 
				+ * The flags specify what type source and destinations are (either MFN
			
 
				+ * or grant reference).
			
 
				+ *
			
 
				+ * Note that this can also be used to copy data between two domains
			
 
				+ * via a third party if the source and destination domains had previously
			
 
				+ * grant appropriate access to their pages to the third party.
			
 
				+ *
			
 
				+ * source_offset specifies an offset in the source frame, dest_offset
			
 
				+ * the offset in the target frame and  len specifies the number of
			
 
				+ * bytes to be copied.
			
 
				+ */
			
 
				+
			
 
				+#define _GNTCOPY_source_gref      (0)
			
 
				+#define GNTCOPY_source_gref       (1<<_GNTCOPY_source_gref)
			
 
				+#define _GNTCOPY_dest_gref        (1)
			
 
				+#define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)
			
 
				+
			
 
				+struct gnttab_copy {
			
 
				+    /* IN parameters. */
			
 
				+    struct gnttab_copy_ptr {
			
 
				+        union {
			
 
				+            grant_ref_t ref;
			
 
				+            xen_pfn_t   gmfn;
			
 
				+        } u;
			
 
				+        domid_t  domid;
			
 
				+        uint16_t offset;
			
 
				+    } source, dest;
			
 
				+    uint16_t      len;
			
 
				+    uint16_t      flags;          /* GNTCOPY_* */
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t       status;
			
 
				+};
			
 
				+typedef struct gnttab_copy  gnttab_copy_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
			
 
				+ * grant table.
			
 
				+ * NOTES:
			
 
				+ *  1. <dom> may be specified as DOMID_SELF.
			
 
				+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
			
 
				+ */
			
 
				+struct gnttab_query_size {
			
 
				+    /* IN parameters. */
			
 
				+    domid_t  dom;
			
 
				+    /* OUT parameters. */
			
 
				+    uint32_t nr_frames;
			
 
				+    uint32_t max_nr_frames;
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+};
			
 
				+typedef struct gnttab_query_size gnttab_query_size_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
			
 
				+ * tracked by <handle> but atomically replace the page table entry with one
			
 
				+ * pointing to the machine address under <new_addr>.  <new_addr> will be
			
 
				+ * redirected to the null entry.
			
 
				+ * NOTES:
			
 
				+ *  1. The call may fail in an undefined manner if either mapping is not
			
 
				+ *     tracked by <handle>.
			
 
				+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
			
 
				+ *     mappings will remain in the device or host TLBs.
			
 
				+ */
			
 
				+struct gnttab_unmap_and_replace {
			
 
				+    /* IN parameters. */
			
 
				+    uint64_t host_addr;
			
 
				+    uint64_t new_addr;
			
 
				+    grant_handle_t handle;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+};
			
 
				+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
			
 
				+/*
			
 
				+ * GNTTABOP_set_version: Request a particular version of the grant
			
 
				+ * table shared table structure.  This operation may be used to toggle
			
 
				+ * between different versions, but must be performed while no grants
			
 
				+ * are active.  The only defined versions are 1 and 2.
			
 
				+ */
			
 
				+struct gnttab_set_version {
			
 
				+    /* IN/OUT parameters */
			
 
				+    uint32_t version;
			
 
				+};
			
 
				+typedef struct gnttab_set_version gnttab_set_version_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
			
 
				+ * status for <dom>. In grant format version 2, the status is separated
			
 
				+ * from the other shared grant fields to allow more efficient synchronization
			
 
				+ * using barriers instead of atomic cmpexch operations.
			
 
				+ * <nr_frames> specify the size of vector <frame_list>.
			
 
				+ * The frame addresses are returned in the <frame_list>.
			
 
				+ * Only <nr_frames> addresses are returned, even if the table is larger.
			
 
				+ * NOTES:
			
 
				+ *  1. <dom> may be specified as DOMID_SELF.
			
 
				+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
			
 
				+ */
			
 
				+struct gnttab_get_status_frames {
			
 
				+    /* IN parameters. */
			
 
				+    uint32_t nr_frames;
			
 
				+    domid_t  dom;
			
 
				+    /* OUT parameters. */
			
 
				+    int16_t  status;              /* => enum grant_status */
			
 
				+    XEN_GUEST_HANDLE(uint64_t) frame_list;
			
 
				+};
			
 
				+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_get_version: Get the grant table version which is in
			
 
				+ * effect for domain <dom>.
			
 
				+ */
			
 
				+struct gnttab_get_version {
			
 
				+    /* IN parameters */
			
 
				+    domid_t dom;
			
 
				+    uint16_t pad;
			
 
				+    /* OUT parameters */
			
 
				+    uint32_t version;
			
 
				+};
			
 
				+typedef struct gnttab_get_version gnttab_get_version_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
			
 
				+
			
 
				+/*
			
 
				+ * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries.
			
 
				+ */
			
 
				+struct gnttab_swap_grant_ref {
			
 
				+    /* IN parameters */
			
 
				+    grant_ref_t ref_a;
			
 
				+    grant_ref_t ref_b;
			
 
				+    /* OUT parameters */
			
 
				+    int16_t status;             /* => enum grant_status */
			
 
				+};
			
 
				+typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t);
			
 
				+
			
 
				+/*
			
 
				+ * Issue one or more cache maintenance operations on a portion of a
			
 
				+ * page granted to the calling domain by a foreign domain.
			
 
				+ */
			
 
				+struct gnttab_cache_flush {
			
 
				+    union {
			
 
				+        uint64_t dev_bus_addr;
			
 
				+        grant_ref_t ref;
			
 
				+    } a;
			
 
				+    uint16_t offset; /* offset from start of grant */
			
 
				+    uint16_t length; /* size within the grant */
			
 
				+#define GNTTAB_CACHE_CLEAN          (1u<<0)
			
 
				+#define GNTTAB_CACHE_INVAL          (1u<<1)
			
 
				+#define GNTTAB_CACHE_SOURCE_GREF    (1u<<31)
			
 
				+    uint32_t op;
			
 
				+};
			
 
				+typedef struct gnttab_cache_flush gnttab_cache_flush_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t);
			
 
				+
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ */
			
 
				+
			
 
				+/*
			
 
				+ * Bitfield values for gnttab_map_grant_ref.flags.
			
 
				+ */
			
 
				+ /* Map the grant entry for access by I/O devices. */
			
 
				+#define _GNTMAP_device_map      (0)
			
 
				+#define GNTMAP_device_map       (1<<_GNTMAP_device_map)
			
 
				+ /* Map the grant entry for access by host CPUs. */
			
 
				+#define _GNTMAP_host_map        (1)
			
 
				+#define GNTMAP_host_map         (1<<_GNTMAP_host_map)
			
 
				+ /* Accesses to the granted frame will be restricted to read-only access. */
			
 
				+#define _GNTMAP_readonly        (2)
			
 
				+#define GNTMAP_readonly         (1<<_GNTMAP_readonly)
			
 
				+ /*
			
 
				+  * GNTMAP_host_map subflag:
			
 
				+  *  0 => The host mapping is usable only by the guest OS.
			
 
				+  *  1 => The host mapping is usable by guest OS + current application.
			
 
				+  */
			
 
				+#define _GNTMAP_application_map (3)
			
 
				+#define GNTMAP_application_map  (1<<_GNTMAP_application_map)
			
 
				+
			
 
				+ /*
			
 
				+  * GNTMAP_contains_pte subflag:
			
 
				+  *  0 => This map request contains a host virtual address.
			
 
				+  *  1 => This map request contains the machine addess of the PTE to update.
			
 
				+  */
			
 
				+#define _GNTMAP_contains_pte    (4)
			
 
				+#define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)
			
 
				+
			
 
				+/*
			
 
				+ * Bits to be placed in guest kernel available PTE bits (architecture
			
 
				+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
			
 
				+ */
			
 
				+#define _GNTMAP_guest_avail0    (16)
			
 
				+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
			
 
				+
			
 
				+/*
			
 
				+ * Values for error status returns. All errors are -ve.
			
 
				+ */
			
 
				+/* ` enum grant_status { */
			
 
				+#define GNTST_okay             (0)  /* Normal return.                        */
			
 
				+#define GNTST_general_error    (-1) /* General undefined error.              */
			
 
				+#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
			
 
				+#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
			
 
				+#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
			
 
				+#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
			
 
				+#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/
			
 
				+#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
			
 
				+#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
			
 
				+#define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
			
 
				+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
			
 
				+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
			
 
				+#define GNTST_eagain          (-12) /* Operation not done; try again.        */
			
 
				+#define GNTST_no_space        (-13) /* Out of space (handles etc).           */
			
 
				+/* ` } */
			
 
				+
			
 
				+#define GNTTABOP_error_msgs {                   \
			
 
				+    "okay",                                     \
			
 
				+    "undefined error",                          \
			
 
				+    "unrecognised domain id",                   \
			
 
				+    "invalid grant reference",                  \
			
 
				+    "invalid mapping handle",                   \
			
 
				+    "invalid virtual address",                  \
			
 
				+    "invalid device address",                   \
			
 
				+    "no spare translation slot in the I/O MMU", \
			
 
				+    "permission denied",                        \
			
 
				+    "bad page",                                 \
			
 
				+    "copy arguments cross page boundary",       \
			
 
				+    "page address size too large",              \
			
 
				+    "operation not done; try again",            \
			
 
				+    "out of space",                             \
			
 
				+}
			
 
				+
			
 
				 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/hvm/hvm_op.h
+++ b/include/hw/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,395 @@
 
				+/*
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2007, Keir Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
			
 
				+#define __XEN_PUBLIC_HVM_HVM_OP_H__
			
 
				+
			
 
				+#include "../xen.h"
			
 
				+#include "../trace.h"
			
 
				+#include "../event_channel.h"
			
 
				+
			
 
				+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
			
 
				+#define HVMOP_set_param           0
			
 
				+#define HVMOP_get_param           1
			
 
				+struct xen_hvm_param {
			
 
				+    domid_t  domid;    /* IN */
			
 
				+    uint16_t pad;
			
 
				+    uint32_t index;    /* IN */
			
 
				+    uint64_t value;    /* IN/OUT */
			
 
				+};
			
 
				+typedef struct xen_hvm_param xen_hvm_param_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_suppress_ve {
			
 
				+    uint16_t view;
			
 
				+    uint8_t suppress_ve; /* Boolean type. */
			
 
				+    uint8_t pad1;
			
 
				+    uint32_t pad2;
			
 
				+    uint64_t gfn;
			
 
				+};
			
 
				+
			
 
				+struct xen_hvm_altp2m_suppress_ve_multi {
			
 
				+    uint16_t view;
			
 
				+    uint8_t suppress_ve; /* Boolean type. */
			
 
				+    uint8_t pad1;
			
 
				+    int32_t first_error; /* Should be set to 0. */
			
 
				+    uint64_t first_gfn; /* Value may be updated. */
			
 
				+    uint64_t last_gfn;
			
 
				+    uint64_t first_error_gfn; /* Gfn of the first error. */
			
 
				+};
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040900
			
 
				+
			
 
				+/* Set the logical level of one of a domain's PCI INTx wires. */
			
 
				+#define HVMOP_set_pci_intx_level  2
			
 
				+struct xen_hvm_set_pci_intx_level {
			
 
				+    /* Domain to be updated. */
			
 
				+    domid_t  domid;
			
 
				+    /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
			
 
				+    uint8_t  domain, bus, device, intx;
			
 
				+    /* Assertion level (0 = unasserted, 1 = asserted). */
			
 
				+    uint8_t  level;
			
 
				+};
			
 
				+typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t);
			
 
				+
			
 
				+/* Set the logical level of one of a domain's ISA IRQ wires. */
			
 
				+#define HVMOP_set_isa_irq_level   3
			
 
				+struct xen_hvm_set_isa_irq_level {
			
 
				+    /* Domain to be updated. */
			
 
				+    domid_t  domid;
			
 
				+    /* ISA device identification, by ISA IRQ (0-15). */
			
 
				+    uint8_t  isa_irq;
			
 
				+    /* Assertion level (0 = unasserted, 1 = asserted). */
			
 
				+    uint8_t  level;
			
 
				+};
			
 
				+typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t);
			
 
				+
			
 
				+#define HVMOP_set_pci_link_route  4
			
 
				+struct xen_hvm_set_pci_link_route {
			
 
				+    /* Domain to be updated. */
			
 
				+    domid_t  domid;
			
 
				+    /* PCI link identifier (0-3). */
			
 
				+    uint8_t  link;
			
 
				+    /* ISA IRQ (1-15), or 0 (disable link). */
			
 
				+    uint8_t  isa_irq;
			
 
				+};
			
 
				+typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
			
 
				+
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040900 */
			
 
				+
			
 
				+/* Flushes all VCPU TLBs: @arg must be NULL. */
			
 
				+#define HVMOP_flush_tlbs          5
			
 
				+
			
 
				+/*
			
 
				+ * hvmmem_type_t should not be defined when generating the corresponding
			
 
				+ * compat header. This will ensure that the improperly named HVMMEM_(*)
			
 
				+ * values are defined only once.
			
 
				+ */
			
 
				+#ifndef XEN_GENERATING_COMPAT_HEADERS
			
 
				+
			
 
				+typedef enum {
			
 
				+    HVMMEM_ram_rw,             /* Normal read/write guest RAM */
			
 
				+    HVMMEM_ram_ro,             /* Read-only; writes are discarded */
			
 
				+    HVMMEM_mmio_dm,            /* Reads and write go to the device model */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040700
			
 
				+    HVMMEM_mmio_write_dm,      /* Read-only; writes go to the device model */
			
 
				+#else
			
 
				+    HVMMEM_unused,             /* Placeholder; setting memory to this type
			
 
				+                                  will fail for code after 4.7.0 */
			
 
				+#endif
			
 
				+    HVMMEM_ioreq_server        /* Memory type claimed by an ioreq server; type
			
 
				+                                  changes to this value are only allowed after
			
 
				+                                  an ioreq server has claimed its ownership.
			
 
				+                                  Only pages with HVMMEM_ram_rw are allowed to
			
 
				+                                  change to this type; conversely, pages with
			
 
				+                                  this type are only allowed to be changed back
			
 
				+                                  to HVMMEM_ram_rw. */
			
 
				+} hvmmem_type_t;
			
 
				+
			
 
				+#endif /* XEN_GENERATING_COMPAT_HEADERS */
			
 
				+
			
 
				+/* Hint from PV drivers for pagetable destruction. */
			
 
				+#define HVMOP_pagetable_dying        9
			
 
				+struct xen_hvm_pagetable_dying {
			
 
				+    /* Domain with a pagetable about to be destroyed. */
			
 
				+    domid_t  domid;
			
 
				+    uint16_t pad[3]; /* align next field on 8-byte boundary */
			
 
				+    /* guest physical address of the toplevel pagetable dying */
			
 
				+    uint64_t gpa;
			
 
				+};
			
 
				+typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
			
 
				+
			
 
				+/* Get the current Xen time, in nanoseconds since system boot. */
			
 
				+#define HVMOP_get_time              10
			
 
				+struct xen_hvm_get_time {
			
 
				+    uint64_t now;      /* OUT */
			
 
				+};
			
 
				+typedef struct xen_hvm_get_time xen_hvm_get_time_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t);
			
 
				+
			
 
				+#define HVMOP_xentrace              11
			
 
				+struct xen_hvm_xentrace {
			
 
				+    uint16_t event, extra_bytes;
			
 
				+    uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)];
			
 
				+};
			
 
				+typedef struct xen_hvm_xentrace xen_hvm_xentrace_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t);
			
 
				+
			
 
				+/* Following tools-only interfaces may change in future. */
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+/* Deprecated by XENMEM_access_op_set_access */
			
 
				+#define HVMOP_set_mem_access        12
			
 
				+
			
 
				+/* Deprecated by XENMEM_access_op_get_access */
			
 
				+#define HVMOP_get_mem_access        13
			
 
				+
			
 
				+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
			
 
				+
			
 
				+#define HVMOP_get_mem_type    15
			
 
				+/* Return hvmmem_type_t for the specified pfn. */
			
 
				+struct xen_hvm_get_mem_type {
			
 
				+    /* Domain to be queried. */
			
 
				+    domid_t domid;
			
 
				+    /* OUT variable. */
			
 
				+    uint16_t mem_type;
			
 
				+    uint16_t pad[2]; /* align next field on 8-byte boundary */
			
 
				+    /* IN variable. */
			
 
				+    uint64_t pfn;
			
 
				+};
			
 
				+typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t);
			
 
				+
			
 
				+/* Following tools-only interfaces may change in future. */
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+/*
			
 
				+ * Definitions relating to DMOP_create_ioreq_server. (Defined here for
			
 
				+ * backwards compatibility).
			
 
				+ */
			
 
				+
			
 
				+#define HVM_IOREQSRV_BUFIOREQ_OFF    0
			
 
				+#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1
			
 
				+/*
			
 
				+ * Use this when read_pointer gets updated atomically and
			
 
				+ * the pointer pair gets read atomically:
			
 
				+ */
			
 
				+#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2
			
 
				+
			
 
				+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+
			
 
				+/*
			
 
				+ * HVMOP_set_evtchn_upcall_vector: Set a <vector> that should be used for event
			
 
				+ *                                 channel upcalls on the specified <vcpu>. If set,
			
 
				+ *                                 this vector will be used in preference to the
			
 
				+ *                                 domain global callback via (see
			
 
				+ *                                 HVM_PARAM_CALLBACK_IRQ).
			
 
				+ */
			
 
				+#define HVMOP_set_evtchn_upcall_vector 23
			
 
				+struct xen_hvm_evtchn_upcall_vector {
			
 
				+    uint32_t vcpu;
			
 
				+    uint8_t vector;
			
 
				+};
			
 
				+typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t);
			
 
				+
			
 
				+#endif /* defined(__i386__) || defined(__x86_64__) */
			
 
				+
			
 
				+#define HVMOP_guest_request_vm_event 24
			
 
				+
			
 
				+/* HVMOP_altp2m: perform altp2m state operations */
			
 
				+#define HVMOP_altp2m 25
			
 
				+
			
 
				+#define HVMOP_ALTP2M_INTERFACE_VERSION 0x00000001
			
 
				+
			
 
				+struct xen_hvm_altp2m_domain_state {
			
 
				+    /* IN or OUT variable on/off */
			
 
				+    uint8_t state;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_domain_state xen_hvm_altp2m_domain_state_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_domain_state_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_vcpu_enable_notify {
			
 
				+    uint32_t vcpu_id;
			
 
				+    uint32_t pad;
			
 
				+    /* #VE info area gfn */
			
 
				+    uint64_t gfn;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_vcpu_enable_notify xen_hvm_altp2m_vcpu_enable_notify_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_enable_notify_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_vcpu_disable_notify {
			
 
				+    uint32_t vcpu_id;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_vcpu_disable_notify xen_hvm_altp2m_vcpu_disable_notify_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_disable_notify_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_view {
			
 
				+    /* IN/OUT variable */
			
 
				+    uint16_t view;
			
 
				+    uint16_t hvmmem_default_access; /* xenmem_access_t */
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_view xen_hvm_altp2m_view_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_view_t);
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040a00
			
 
				+struct xen_hvm_altp2m_set_mem_access {
			
 
				+    /* view */
			
 
				+    uint16_t view;
			
 
				+    /* Memory type */
			
 
				+    uint16_t access; /* xenmem_access_t */
			
 
				+    uint32_t pad;
			
 
				+    /* gfn */
			
 
				+    uint64_t gfn;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_set_mem_access xen_hvm_altp2m_set_mem_access_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_set_mem_access_t);
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */
			
 
				+
			
 
				+struct xen_hvm_altp2m_mem_access {
			
 
				+    /* view */
			
 
				+    uint16_t view;
			
 
				+    /* Memory type */
			
 
				+    uint16_t access; /* xenmem_access_t */
			
 
				+    uint32_t pad;
			
 
				+    /* gfn */
			
 
				+    uint64_t gfn;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_mem_access xen_hvm_altp2m_mem_access_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_mem_access_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_set_mem_access_multi {
			
 
				+    /* view */
			
 
				+    uint16_t view;
			
 
				+    uint16_t pad;
			
 
				+    /* Number of pages */
			
 
				+    uint32_t nr;
			
 
				+    /*
			
 
				+     * Used for continuation purposes.
			
 
				+     * Must be set to zero upon initial invocation.
			
 
				+     */
			
 
				+    uint64_t opaque;
			
 
				+    /* List of pfns to set access for */
			
 
				+    XEN_GUEST_HANDLE(const_uint64) pfn_list;
			
 
				+    /* Corresponding list of access settings for pfn_list */
			
 
				+    XEN_GUEST_HANDLE(const_uint8) access_list;
			
 
				+};
			
 
				+
			
 
				+struct xen_hvm_altp2m_change_gfn {
			
 
				+    /* view */
			
 
				+    uint16_t view;
			
 
				+    uint16_t pad1;
			
 
				+    uint32_t pad2;
			
 
				+    /* old gfn */
			
 
				+    uint64_t old_gfn;
			
 
				+    /* new gfn, INVALID_GFN (~0UL) means revert */
			
 
				+    uint64_t new_gfn;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_change_gfn xen_hvm_altp2m_change_gfn_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_change_gfn_t);
			
 
				+
			
 
				+struct xen_hvm_altp2m_get_vcpu_p2m_idx {
			
 
				+    uint32_t vcpu_id;
			
 
				+    uint16_t altp2m_idx;
			
 
				+};
			
 
				+
			
 
				+struct xen_hvm_altp2m_set_visibility {
			
 
				+    uint16_t altp2m_idx;
			
 
				+    uint8_t visible;
			
 
				+    uint8_t pad;
			
 
				+};
			
 
				+
			
 
				+struct xen_hvm_altp2m_op {
			
 
				+    uint32_t version;   /* HVMOP_ALTP2M_INTERFACE_VERSION */
			
 
				+    uint32_t cmd;
			
 
				+/* Get/set the altp2m state for a domain */
			
 
				+#define HVMOP_altp2m_get_domain_state     1
			
 
				+#define HVMOP_altp2m_set_domain_state     2
			
 
				+/* Set a given VCPU to receive altp2m event notifications */
			
 
				+#define HVMOP_altp2m_vcpu_enable_notify   3
			
 
				+/* Create a new view */
			
 
				+#define HVMOP_altp2m_create_p2m           4
			
 
				+/* Destroy a view */
			
 
				+#define HVMOP_altp2m_destroy_p2m          5
			
 
				+/* Switch view for an entire domain */
			
 
				+#define HVMOP_altp2m_switch_p2m           6
			
 
				+/* Notify that a page of memory is to have specific access types */
			
 
				+#define HVMOP_altp2m_set_mem_access       7
			
 
				+/* Change a p2m entry to have a different gfn->mfn mapping */
			
 
				+#define HVMOP_altp2m_change_gfn           8
			
 
				+/* Set access for an array of pages */
			
 
				+#define HVMOP_altp2m_set_mem_access_multi 9
			
 
				+/* Set the "Suppress #VE" bit on a page */
			
 
				+#define HVMOP_altp2m_set_suppress_ve      10
			
 
				+/* Get the "Suppress #VE" bit of a page */
			
 
				+#define HVMOP_altp2m_get_suppress_ve      11
			
 
				+/* Get the access of a page of memory from a certain view */
			
 
				+#define HVMOP_altp2m_get_mem_access       12
			
 
				+/* Disable altp2m event notifications for a given VCPU */
			
 
				+#define HVMOP_altp2m_vcpu_disable_notify  13
			
 
				+/* Get the active vcpu p2m index */
			
 
				+#define HVMOP_altp2m_get_p2m_idx          14
			
 
				+/* Set the "Supress #VE" bit for a range of pages */
			
 
				+#define HVMOP_altp2m_set_suppress_ve_multi 15
			
 
				+/* Set visibility for a given altp2m view */
			
 
				+#define HVMOP_altp2m_set_visibility       16
			
 
				+    domid_t domain;
			
 
				+    uint16_t pad1;
			
 
				+    uint32_t pad2;
			
 
				+    union {
			
 
				+        struct xen_hvm_altp2m_domain_state         domain_state;
			
 
				+        struct xen_hvm_altp2m_vcpu_enable_notify   enable_notify;
			
 
				+        struct xen_hvm_altp2m_view                 view;
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040a00
			
 
				+        struct xen_hvm_altp2m_set_mem_access       set_mem_access;
			
 
				+#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */
			
 
				+        struct xen_hvm_altp2m_mem_access           mem_access;
			
 
				+        struct xen_hvm_altp2m_change_gfn           change_gfn;
			
 
				+        struct xen_hvm_altp2m_set_mem_access_multi set_mem_access_multi;
			
 
				+        struct xen_hvm_altp2m_suppress_ve          suppress_ve;
			
 
				+        struct xen_hvm_altp2m_suppress_ve_multi    suppress_ve_multi;
			
 
				+        struct xen_hvm_altp2m_vcpu_disable_notify  disable_notify;
			
 
				+        struct xen_hvm_altp2m_get_vcpu_p2m_idx     get_vcpu_p2m_idx;
			
 
				+        struct xen_hvm_altp2m_set_visibility       set_visibility;
			
 
				+        uint8_t pad[64];
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t);
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/hvm/params.h
+++ b/include/hw/xen/interface/hvm/params.h
@@ -0,0 +1,318 @@
 
				+/*
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2007, Keir Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
			
 
				+#define __XEN_PUBLIC_HVM_PARAMS_H__
			
 
				+
			
 
				+#include "hvm_op.h"
			
 
				+
			
 
				+/* These parameters are deprecated and their meaning is undefined. */
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+#define HVM_PARAM_PAE_ENABLED                4
			
 
				+#define HVM_PARAM_DM_DOMAIN                 13
			
 
				+#define HVM_PARAM_MEMORY_EVENT_CR0          20
			
 
				+#define HVM_PARAM_MEMORY_EVENT_CR3          21
			
 
				+#define HVM_PARAM_MEMORY_EVENT_CR4          22
			
 
				+#define HVM_PARAM_MEMORY_EVENT_INT3         23
			
 
				+#define HVM_PARAM_NESTEDHVM                 24
			
 
				+#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP  25
			
 
				+#define HVM_PARAM_BUFIOREQ_EVTCHN           26
			
 
				+#define HVM_PARAM_MEMORY_EVENT_MSR          30
			
 
				+
			
 
				+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
			
 
				+
			
 
				+/*
			
 
				+ * Parameter space for HVMOP_{set,get}_param.
			
 
				+ */
			
 
				+
			
 
				+#define HVM_PARAM_CALLBACK_IRQ 0
			
 
				+#define HVM_PARAM_CALLBACK_IRQ_TYPE_MASK xen_mk_ullong(0xFF00000000000000)
			
 
				+/*
			
 
				+ * How should CPU0 event-channel notifications be delivered?
			
 
				+ *
			
 
				+ * If val == 0 then CPU0 event-channel notifications are not delivered.
			
 
				+ * If val != 0, val[63:56] encodes the type, as follows:
			
 
				+ */
			
 
				+
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_GSI      0
			
 
				+/*
			
 
				+ * val[55:0] is a delivery GSI.  GSI 0 cannot be used, as it aliases val == 0,
			
 
				+ * and disables all notifications.
			
 
				+ */
			
 
				+
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1
			
 
				+/*
			
 
				+ * val[55:0] is a delivery PCI INTx line:
			
 
				+ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0]
			
 
				+ */
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_VECTOR   2
			
 
				+/*
			
 
				+ * val[7:0] is a vector number.  Check for XENFEAT_hvm_callback_vector to know
			
 
				+ * if this delivery method is available.
			
 
				+ */
			
 
				+#elif defined(__arm__) || defined(__aarch64__)
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_PPI      2
			
 
				+/*
			
 
				+ * val[55:16] needs to be zero.
			
 
				+ * val[15:8] is interrupt flag of the PPI used by event-channel:
			
 
				+ *  bit 8: the PPI is edge(1) or level(0) triggered
			
 
				+ *  bit 9: the PPI is active low(1) or high(0)
			
 
				+ * val[7:0] is a PPI number used by event-channel.
			
 
				+ * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to
			
 
				+ * the notification is handled by the interrupt controller.
			
 
				+ */
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK      0xFF00
			
 
				+#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL 2
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * These are not used by Xen. They are here for convenience of HVM-guest
			
 
				+ * xenbus implementations.
			
 
				+ */
			
 
				+#define HVM_PARAM_STORE_PFN    1
			
 
				+#define HVM_PARAM_STORE_EVTCHN 2
			
 
				+
			
 
				+#define HVM_PARAM_IOREQ_PFN    5
			
 
				+
			
 
				+#define HVM_PARAM_BUFIOREQ_PFN 6
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+
			
 
				+/*
			
 
				+ * Viridian enlightenments
			
 
				+ *
			
 
				+ * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx)
			
 
				+ *
			
 
				+ * To expose viridian enlightenments to the guest set this parameter
			
 
				+ * to the desired feature mask. The base feature set must be present
			
 
				+ * in any valid feature mask.
			
 
				+ */
			
 
				+#define HVM_PARAM_VIRIDIAN     9
			
 
				+
			
 
				+/* Base+Freq viridian feature sets:
			
 
				+ *
			
 
				+ * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL)
			
 
				+ * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
			
 
				+ * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX)
			
 
				+ * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and
			
 
				+ *   HV_X64_MSR_APIC_FREQUENCY)
			
 
				+ */
			
 
				+#define _HVMPV_base_freq 0
			
 
				+#define HVMPV_base_freq  (1 << _HVMPV_base_freq)
			
 
				+
			
 
				+/* Feature set modifications */
			
 
				+
			
 
				+/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and
			
 
				+ * HV_X64_MSR_APIC_FREQUENCY).
			
 
				+ * This modification restores the viridian feature set to the
			
 
				+ * original 'base' set exposed in releases prior to Xen 4.4.
			
 
				+ */
			
 
				+#define _HVMPV_no_freq 1
			
 
				+#define HVMPV_no_freq  (1 << _HVMPV_no_freq)
			
 
				+
			
 
				+/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */
			
 
				+#define _HVMPV_time_ref_count 2
			
 
				+#define HVMPV_time_ref_count  (1 << _HVMPV_time_ref_count)
			
 
				+
			
 
				+/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */
			
 
				+#define _HVMPV_reference_tsc 3
			
 
				+#define HVMPV_reference_tsc  (1 << _HVMPV_reference_tsc)
			
 
				+
			
 
				+/* Use Hypercall for remote TLB flush */
			
 
				+#define _HVMPV_hcall_remote_tlb_flush 4
			
 
				+#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush)
			
 
				+
			
 
				+/* Use APIC assist */
			
 
				+#define _HVMPV_apic_assist 5
			
 
				+#define HVMPV_apic_assist (1 << _HVMPV_apic_assist)
			
 
				+
			
 
				+/* Enable crash MSRs */
			
 
				+#define _HVMPV_crash_ctl 6
			
 
				+#define HVMPV_crash_ctl (1 << _HVMPV_crash_ctl)
			
 
				+
			
 
				+/* Enable SYNIC MSRs */
			
 
				+#define _HVMPV_synic 7
			
 
				+#define HVMPV_synic (1 << _HVMPV_synic)
			
 
				+
			
 
				+/* Enable STIMER MSRs */
			
 
				+#define _HVMPV_stimer 8
			
 
				+#define HVMPV_stimer (1 << _HVMPV_stimer)
			
 
				+
			
 
				+/* Use Synthetic Cluster IPI Hypercall */
			
 
				+#define _HVMPV_hcall_ipi 9
			
 
				+#define HVMPV_hcall_ipi (1 << _HVMPV_hcall_ipi)
			
 
				+
			
 
				+/* Enable ExProcessorMasks */
			
 
				+#define _HVMPV_ex_processor_masks 10
			
 
				+#define HVMPV_ex_processor_masks (1 << _HVMPV_ex_processor_masks)
			
 
				+
			
 
				+/* Allow more than 64 VPs */
			
 
				+#define _HVMPV_no_vp_limit 11
			
 
				+#define HVMPV_no_vp_limit (1 << _HVMPV_no_vp_limit)
			
 
				+
			
 
				+/* Enable vCPU hotplug */
			
 
				+#define _HVMPV_cpu_hotplug 12
			
 
				+#define HVMPV_cpu_hotplug (1 << _HVMPV_cpu_hotplug)
			
 
				+
			
 
				+#define HVMPV_feature_mask \
			
 
				+        (HVMPV_base_freq | \
			
 
				+         HVMPV_no_freq | \
			
 
				+         HVMPV_time_ref_count | \
			
 
				+         HVMPV_reference_tsc | \
			
 
				+         HVMPV_hcall_remote_tlb_flush | \
			
 
				+         HVMPV_apic_assist | \
			
 
				+         HVMPV_crash_ctl | \
			
 
				+         HVMPV_synic | \
			
 
				+         HVMPV_stimer | \
			
 
				+         HVMPV_hcall_ipi | \
			
 
				+         HVMPV_ex_processor_masks | \
			
 
				+         HVMPV_no_vp_limit | \
			
 
				+         HVMPV_cpu_hotplug)
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Set mode for virtual timers (currently x86 only):
			
 
				+ *  delay_for_missed_ticks (default):
			
 
				+ *   Do not advance a vcpu's time beyond the correct delivery time for
			
 
				+ *   interrupts that have been missed due to preemption. Deliver missed
			
 
				+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
			
 
				+ *   time stepwise for each one.
			
 
				+ *  no_delay_for_missed_ticks:
			
 
				+ *   As above, missed interrupts are delivered, but guest time always tracks
			
 
				+ *   wallclock (i.e., real) time while doing so.
			
 
				+ *  no_missed_ticks_pending:
			
 
				+ *   No missed interrupts are held pending. Instead, to ensure ticks are
			
 
				+ *   delivered at some non-zero rate, if we detect missed ticks then the
			
 
				+ *   internal tick alarm is not disabled if the VCPU is preempted during the
			
 
				+ *   next tick period.
			
 
				+ *  one_missed_tick_pending:
			
 
				+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.
			
 
				+ *   Guest time always tracks wallclock (i.e., real) time.
			
 
				+ */
			
 
				+#define HVM_PARAM_TIMER_MODE   10
			
 
				+#define HVMPTM_delay_for_missed_ticks    0
			
 
				+#define HVMPTM_no_delay_for_missed_ticks 1
			
 
				+#define HVMPTM_no_missed_ticks_pending   2
			
 
				+#define HVMPTM_one_missed_tick_pending   3
			
 
				+
			
 
				+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
			
 
				+#define HVM_PARAM_HPET_ENABLED 11
			
 
				+
			
 
				+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
			
 
				+#define HVM_PARAM_IDENT_PT     12
			
 
				+
			
 
				+/* ACPI S state: currently support S0 and S3 on x86. */
			
 
				+#define HVM_PARAM_ACPI_S_STATE 14
			
 
				+
			
 
				+/* TSS used on Intel when CR0.PE=0. */
			
 
				+#define HVM_PARAM_VM86_TSS     15
			
 
				+
			
 
				+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
			
 
				+#define HVM_PARAM_VPT_ALIGN    16
			
 
				+
			
 
				+/* Console debug shared memory ring and event channel */
			
 
				+#define HVM_PARAM_CONSOLE_PFN    17
			
 
				+#define HVM_PARAM_CONSOLE_EVTCHN 18
			
 
				+
			
 
				+/*
			
 
				+ * Select location of ACPI PM1a and TMR control blocks. Currently two locations
			
 
				+ * are supported, specified by version 0 or 1 in this parameter:
			
 
				+ *   - 0: default, use the old addresses
			
 
				+ *        PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48
			
 
				+ *   - 1: use the new default qemu addresses
			
 
				+ *        PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008
			
 
				+ * You can find these address definitions in <hvm/ioreq.h>
			
 
				+ */
			
 
				+#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19
			
 
				+
			
 
				+/* Params for the mem event rings */
			
 
				+#define HVM_PARAM_PAGING_RING_PFN   27
			
 
				+#define HVM_PARAM_MONITOR_RING_PFN  28
			
 
				+#define HVM_PARAM_SHARING_RING_PFN  29
			
 
				+
			
 
				+/* SHUTDOWN_* action in case of a triple fault */
			
 
				+#define HVM_PARAM_TRIPLE_FAULT_REASON 31
			
 
				+
			
 
				+#define HVM_PARAM_IOREQ_SERVER_PFN 32
			
 
				+#define HVM_PARAM_NR_IOREQ_SERVER_PAGES 33
			
 
				+
			
 
				+/* Location of the VM Generation ID in guest physical address space. */
			
 
				+#define HVM_PARAM_VM_GENERATION_ID_ADDR 34
			
 
				+
			
 
				+/*
			
 
				+ * Set mode for altp2m:
			
 
				+ *  disabled: don't activate altp2m (default)
			
 
				+ *  mixed: allow access to all altp2m ops for both in-guest and external tools
			
 
				+ *  external: allow access to external privileged tools only
			
 
				+ *  limited: guest only has limited access (ie. control VMFUNC and #VE)
			
 
				+ *
			
 
				+ * Note that 'mixed' mode has not been evaluated for safety from a
			
 
				+ * security perspective.  Before using this mode in a
			
 
				+ * security-critical environment, each subop should be evaluated for
			
 
				+ * safety, with unsafe subops blacklisted in XSM.
			
 
				+ */
			
 
				+#define HVM_PARAM_ALTP2M       35
			
 
				+#define XEN_ALTP2M_disabled      0
			
 
				+#define XEN_ALTP2M_mixed         1
			
 
				+#define XEN_ALTP2M_external      2
			
 
				+#define XEN_ALTP2M_limited       3
			
 
				+
			
 
				+/*
			
 
				+ * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to
			
 
				+ * save/restore.  This is a workaround for a hardware limitation that
			
 
				+ * does not allow the full FIP/FDP and FCS/FDS to be restored.
			
 
				+ *
			
 
				+ * Valid values are:
			
 
				+ *
			
 
				+ * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU
			
 
				+ *    has FPCSDS feature).
			
 
				+ *
			
 
				+ * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of
			
 
				+ *    FIP/FDP.
			
 
				+ *
			
 
				+ * 0: allow hypervisor to choose based on the value of FIP/FDP
			
 
				+ *    (default if CPU does not have FPCSDS).
			
 
				+ *
			
 
				+ * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU
			
 
				+ * never saves FCS/FDS and this parameter should be left at the
			
 
				+ * default of 8.
			
 
				+ */
			
 
				+#define HVM_PARAM_X87_FIP_WIDTH 36
			
 
				+
			
 
				+/*
			
 
				+ * TSS (and its size) used on Intel when CR0.PE=0. The address occupies
			
 
				+ * the low 32 bits, while the size is in the high 32 ones.
			
 
				+ */
			
 
				+#define HVM_PARAM_VM86_TSS_SIZED 37
			
 
				+
			
 
				+/* Enable MCA capabilities. */
			
 
				+#define HVM_PARAM_MCA_CAP 38
			
 
				+#define XEN_HVM_MCA_CAP_LMCE   (xen_mk_ullong(1) << 0)
			
 
				+#define XEN_HVM_MCA_CAP_MASK   XEN_HVM_MCA_CAP_LMCE
			
 
				+
			
 
				+#define HVM_NR_PARAMS 39
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
			
--- a/include/hw/xen/interface/io/blkif.h
+++ b/include/hw/xen/interface/io/blkif.h
@@ -118,7 +118,7 @@
 
				  *
			
 
				  *      The underlying storage is not affected by the direct IO memory
			
 
				  *      lifetime bug.  See:
			
 
				- *        http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
			
 
				+ *        https://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
			
 
				  *
			
 
				  *      Therefore this option gives the backend permission to use
			
 
				  *      O_DIRECT, notwithstanding that bug.
			
@@ -341,7 +341,7 @@
 
				  *      access (even when it should be read-only). If the frontend hits the
			
 
				  *      maximum number of allowed persistently mapped grants, it can fallback
			
 
				  *      to non persistent mode. This will cause a performance degradation,
			
 
				- *      since the backend driver will still try to map those grants
			
 
				+ *      since the the backend driver will still try to map those grants
			
 
				  *      persistently. Since the persistent grants protocol is compatible with
			
 
				  *      the previous protocol, a frontend driver can choose to work in
			
 
				  *      persistent mode even when the backend doesn't support it.
			
@@ -710,3 +710,13 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
 
				 #define VDISK_READONLY     0x4
			
 
				 
			
 
				 #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/console.h
+++ b/include/hw/xen/interface/io/console.h
@@ -44,3 +44,13 @@ DEFINE_XEN_FLEX_RING(xencons);
 
				 #endif
			
 
				 
			
 
				 #endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/fbif.h
+++ b/include/hw/xen/interface/io/fbif.h
@@ -153,4 +153,24 @@ struct xenfb_page
 
				     unsigned long pd[256];
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * Wart: xenkbd needs to know default resolution.  Put it here until a
			
 
				+ * better solution is found, but don't leak it to the backend.
			
 
				+ */
			
 
				+#ifdef __KERNEL__
			
 
				+#define XENFB_WIDTH 800
			
 
				+#define XENFB_HEIGHT 600
			
 
				+#define XENFB_DEPTH 32
			
 
				+#endif
			
 
				+
			
 
				 #endif
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/kbdif.h
+++ b/include/hw/xen/interface/io/kbdif.h
@@ -564,3 +564,13 @@ struct xenkbd_page
 
				 };
			
 
				 
			
 
				 #endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/netif.h
+++ b/include/hw/xen/interface/io/netif.h
@@ -171,7 +171,7 @@
 
				  * The ability of the backend to use a control ring is advertised by
			
 
				  * setting:
			
 
				  *
			
 
				- * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1"
			
 
				+ * /local/domain/X/backend/vif/<domid>/<vif>/feature-ctrl-ring = "1"
			
 
				  *
			
 
				  * The frontend provides a control ring to the backend by setting:
			
 
				  *
			
@@ -190,6 +190,32 @@
 
				  * order as requests.
			
 
				  */
			
 
				 
			
 
				+/*
			
 
				+ * Link state
			
 
				+ * ==========
			
 
				+ *
			
 
				+ * The backend can advertise its current link (carrier) state to the
			
 
				+ * frontend using the /local/domain/X/backend/vif/<domid>/<vif>/carrier
			
 
				+ * node. If this node is not present, then the frontend should assume that
			
 
				+ * the link is up (for compatibility with backends that do not implement
			
 
				+ * this feature). If this node is present, then a value of "0" should be
			
 
				+ * interpreted by the frontend as the link being down (no carrier) and a
			
 
				+ * value of "1" should be interpreted as the link being up (carrier
			
 
				+ * present).
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * MTU
			
 
				+ * ===
			
 
				+ *
			
 
				+ * The toolstack may set a value of MTU for the frontend by setting the
			
 
				+ * /local/domain/<domid>/device/vif/<vif>/mtu node with the MTU value in
			
 
				+ * octets. If this node is absent the frontend should assume an MTU value
			
 
				+ * of 1500 octets. A frontend is also at liberty to ignore this value so
			
 
				+ * it is only suitable for informing the frontend that a packet payload
			
 
				+ * >1500 octets is permitted.
			
 
				+ */
			
 
				+
			
 
				 /*
			
 
				  * Hash types
			
 
				  * ==========
			
@@ -267,6 +293,62 @@
 
				 
			
 
				 #define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
			
 
				 
			
 
				+/*
			
 
				+ * This algorithm uses a 'key' as well as the data buffer itself.
			
 
				+ * (Buffer[] and Key[] are treated as shift-registers where the MSB of
			
 
				+ * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1]
			
 
				+ * is the 'right-most').
			
 
				+ *
			
 
				+ * Value = 0
			
 
				+ * For number of bits in Buffer[]
			
 
				+ *    If (left-most bit of Buffer[] is 1)
			
 
				+ *        Value ^= left-most 32 bits of Key[]
			
 
				+ *    Key[] << 1
			
 
				+ *    Buffer[] << 1
			
 
				+ *
			
 
				+ * The code below is provided for convenience where an operating system
			
 
				+ * does not already provide an implementation.
			
 
				+ */
			
 
				+#ifdef XEN_NETIF_DEFINE_TOEPLITZ
			
 
				+static uint32_t xen_netif_toeplitz_hash(const uint8_t *key,
			
 
				+                                        unsigned int keylen,
			
 
				+                                        const uint8_t *buf,
			
 
				+                                        unsigned int buflen)
			
 
				+{
			
 
				+    unsigned int keyi, bufi;
			
 
				+    uint64_t prefix = 0;
			
 
				+    uint64_t hash = 0;
			
 
				+
			
 
				+    /* Pre-load prefix with the first 8 bytes of the key */
			
 
				+    for (keyi = 0; keyi < 8; keyi++) {
			
 
				+        prefix <<= 8;
			
 
				+        prefix |= (keyi < keylen) ? key[keyi] : 0;
			
 
				+    }
			
 
				+
			
 
				+    for (bufi = 0; bufi < buflen; bufi++) {
			
 
				+        uint8_t byte = buf[bufi];
			
 
				+        unsigned int bit;
			
 
				+
			
 
				+        for (bit = 0; bit < 8; bit++) {
			
 
				+            if (byte & 0x80)
			
 
				+                hash ^= prefix;
			
 
				+            prefix <<= 1;
			
 
				+            byte <<=1;
			
 
				+        }
			
 
				+
			
 
				+        /*
			
 
				+         * 'prefix' has now been left-shifted by 8, so
			
 
				+         * OR in the next byte.
			
 
				+         */
			
 
				+        prefix |= (keyi < keylen) ? key[keyi] : 0;
			
 
				+        keyi++;
			
 
				+    }
			
 
				+
			
 
				+    /* The valid part of the hash is in the upper 32 bits. */
			
 
				+    return hash >> 32;
			
 
				+}
			
 
				+#endif /* XEN_NETIF_DEFINE_TOEPLITZ */
			
 
				+
			
 
				 /*
			
 
				  * Control requests (struct xen_netif_ctrl_request)
			
 
				  * ================================================
			
@@ -1008,3 +1090,13 @@ DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
 
				 #define NETIF_RSP_NULL             1
			
 
				 
			
 
				 #endif
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/ring.h
+++ b/include/hw/xen/interface/io/ring.h
@@ -1,6 +1,6 @@
 
				 /******************************************************************************
			
 
				  * ring.h
			
 
				- * 
			
 
				+ *
			
 
				  * Shared producer-consumer ring macros.
			
 
				  *
			
 
				  * Permission is hereby granted, free of charge, to any person obtaining a copy
			
@@ -33,13 +33,6 @@
 
				  * - standard integers types (uint8_t, uint16_t, etc)
			
 
				  * They are provided by stdint.h of the standard headers.
			
 
				  *
			
 
				- * Before using the different macros, you need to provide the following
			
 
				- * macros:
			
 
				- * - xen_mb()  a memory barrier
			
 
				- * - xen_rmb() a read memory barrier
			
 
				- * - xen_wmb() a write memory barrier
			
 
				- * Example of those can be found in xenctrl.h.
			
 
				- *
			
 
				  * In addition, if you intend to use the FLEX macros, you also need to
			
 
				  * provide the following, before invoking the FLEX macros:
			
 
				  * - size_t
			
@@ -49,6 +42,14 @@
 
				  * and grant_table.h from the Xen public headers.
			
 
				  */
			
 
				 
			
 
				+#include "../xen-compat.h"
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00030208
			
 
				+#define xen_mb()  mb()
			
 
				+#define xen_rmb() rmb()
			
 
				+#define xen_wmb() wmb()
			
 
				+#endif
			
 
				+
			
 
				 typedef unsigned int RING_IDX;
			
 
				 
			
 
				 /* Round a 32-bit unsigned constant down to the nearest power of two. */
			
@@ -61,12 +62,12 @@ typedef unsigned int RING_IDX;
 
				 /*
			
 
				  * Calculate size of a shared ring, given the total available space for the
			
 
				  * ring and indexes (_sz), and the name tag of the request/response structure.
			
 
				- * A ring contains as many entries as will fit, rounded down to the nearest 
			
 
				+ * A ring contains as many entries as will fit, rounded down to the nearest
			
 
				  * power of two (so we can mask with (size-1) to loop around).
			
 
				  */
			
 
				 #define __CONST_RING_SIZE(_s, _sz) \
			
 
				     (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
			
 
				-            sizeof_field(struct _s##_sring, ring[0])))
			
 
				+	    sizeof(((struct _s##_sring *)0)->ring[0])))
			
 
				 /*
			
 
				  * The same for passing in an actual pointer instead of a name tag.
			
 
				  */
			
@@ -75,7 +76,7 @@ typedef unsigned int RING_IDX;
 
				 
			
 
				 /*
			
 
				  * Macros to make the correct C datatypes for a new kind of ring.
			
 
				- * 
			
 
				+ *
			
 
				  * To make a new ring datatype, you need to have two message structures,
			
 
				  * let's say request_t, and response_t already defined.
			
 
				  *
			
@@ -85,7 +86,7 @@ typedef unsigned int RING_IDX;
 
				  *
			
 
				  * These expand out to give you a set of types, as you can see below.
			
 
				  * The most important of these are:
			
 
				- * 
			
 
				+ *
			
 
				  *     mytag_sring_t      - The shared ring.
			
 
				  *     mytag_front_ring_t - The 'front' half of the ring.
			
 
				  *     mytag_back_ring_t  - The 'back' half of the ring.
			
@@ -153,15 +154,15 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
				 
			
 
				 /*
			
 
				  * Macros for manipulating rings.
			
 
				- * 
			
 
				- * FRONT_RING_whatever works on the "front end" of a ring: here 
			
 
				+ *
			
 
				+ * FRONT_RING_whatever works on the "front end" of a ring: here
			
 
				  * requests are pushed on to the ring and responses taken off it.
			
 
				- * 
			
 
				- * BACK_RING_whatever works on the "back end" of a ring: here 
			
 
				+ *
			
 
				+ * BACK_RING_whatever works on the "back end" of a ring: here
			
 
				  * requests are taken off the ring and responses put on.
			
 
				- * 
			
 
				- * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. 
			
 
				- * This is OK in 1-for-1 request-response situations where the 
			
 
				+ *
			
 
				+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
			
 
				+ * This is OK in 1-for-1 request-response situations where the
			
 
				  * requestor (front end) never has more than RING_SIZE()-1
			
 
				  * outstanding requests.
			
 
				  */
			
@@ -174,20 +175,24 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
				     (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
			
 
				 } while(0)
			
 
				 
			
 
				-#define FRONT_RING_INIT(_r, _s, __size) do {                            \
			
 
				-    (_r)->req_prod_pvt = 0;                                             \
			
 
				-    (_r)->rsp_cons = 0;                                                 \
			
 
				+#define FRONT_RING_ATTACH(_r, _s, _i, __size) do {                      \
			
 
				+    (_r)->req_prod_pvt = (_i);                                          \
			
 
				+    (_r)->rsp_cons = (_i);                                              \
			
 
				     (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
			
 
				     (_r)->sring = (_s);                                                 \
			
 
				 } while (0)
			
 
				 
			
 
				-#define BACK_RING_INIT(_r, _s, __size) do {                             \
			
 
				-    (_r)->rsp_prod_pvt = 0;                                             \
			
 
				-    (_r)->req_cons = 0;                                                 \
			
 
				+#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size)
			
 
				+
			
 
				+#define BACK_RING_ATTACH(_r, _s, _i, __size) do {                       \
			
 
				+    (_r)->rsp_prod_pvt = (_i);                                          \
			
 
				+    (_r)->req_cons = (_i);                                              \
			
 
				     (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
			
 
				     (_r)->sring = (_s);                                                 \
			
 
				 } while (0)
			
 
				 
			
 
				+#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size)
			
 
				+
			
 
				 /* How big is this ring? */
			
 
				 #define RING_SIZE(_r)                                                   \
			
 
				     ((_r)->nr_ents)
			
@@ -206,33 +211,45 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
				 #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
			
 
				     ((_r)->sring->rsp_prod - (_r)->rsp_cons)
			
 
				 
			
 
				+#ifdef __GNUC__
			
 
				 #define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
			
 
				     unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
			
 
				     unsigned int rsp = RING_SIZE(_r) -                                  \
			
 
				         ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
			
 
				     req < rsp ? req : rsp;                                              \
			
 
				 })
			
 
				+#else
			
 
				+/* Same as above, but without the nice GCC ({ ... }) syntax. */
			
 
				+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
			
 
				+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
			
 
				+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
			
 
				+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
			
 
				+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
			
 
				+#endif
			
 
				 
			
 
				 /* Direct access to individual ring elements, by index. */
			
 
				 #define RING_GET_REQUEST(_r, _idx)                                      \
			
 
				     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
			
 
				 
			
 
				+#define RING_GET_RESPONSE(_r, _idx)                                     \
			
 
				+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
			
 
				+
			
 
				 /*
			
 
				- * Get a local copy of a request.
			
 
				+ * Get a local copy of a request/response.
			
 
				  *
			
 
				- * Use this in preference to RING_GET_REQUEST() so all processing is
			
 
				+ * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is
			
 
				  * done on a local copy that cannot be modified by the other end.
			
 
				  *
			
 
				  * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
			
 
				- * to be ineffective where _req is a struct which consists of only bitfields.
			
 
				+ * to be ineffective where dest is a struct which consists of only bitfields.
			
 
				  */
			
 
				-#define RING_COPY_REQUEST(_r, _idx, _req) do {				\
			
 
				-        /* Use volatile to force the copy into _req. */			\
			
 
				-        *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);	\
			
 
				+#define RING_COPY_(type, r, idx, dest) do {				\
			
 
				+	/* Use volatile to force the copy into dest. */			\
			
 
				+	*(dest) = *(volatile __typeof__(dest))RING_GET_##type(r, idx);	\
			
 
				 } while (0)
			
 
				 
			
 
				-#define RING_GET_RESPONSE(_r, _idx)                                     \
			
 
				-    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
			
 
				+#define RING_COPY_REQUEST(r, idx, req)  RING_COPY_(REQUEST, r, idx, req)
			
 
				+#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp)
			
 
				 
			
 
				 /* Loop termination condition: Would the specified index overflow the ring? */
			
 
				 #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
			
@@ -242,6 +259,10 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
				 #define RING_REQUEST_PROD_OVERFLOW(_r, _prod)                           \
			
 
				     (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
			
 
				 
			
 
				+/* Ill-behaved backend determination: Can there be this many responses? */
			
 
				+#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod)                          \
			
 
				+    (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r))
			
 
				+
			
 
				 #define RING_PUSH_REQUESTS(_r) do {                                     \
			
 
				     xen_wmb(); /* back sees requests /before/ updated producer index */ \
			
 
				     (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
			
@@ -254,26 +275,26 @@ typedef struct __name##_back_ring __name##_back_ring_t
 
				 
			
 
				 /*
			
 
				  * Notification hold-off (req_event and rsp_event):
			
 
				- * 
			
 
				+ *
			
 
				  * When queueing requests or responses on a shared ring, it may not always be
			
 
				  * necessary to notify the remote end. For example, if requests are in flight
			
 
				  * in a backend, the front may be able to queue further requests without
			
 
				  * notifying the back (if the back checks for new requests when it queues
			
 
				  * responses).
			
 
				- * 
			
 
				+ *
			
 
				  * When enqueuing requests or responses:
			
 
				- * 
			
 
				+ *
			
 
				  *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
			
 
				  *  is a boolean return value. True indicates that the receiver requires an
			
 
				  *  asynchronous notification.
			
 
				- * 
			
 
				+ *
			
 
				  * After dequeuing requests or responses (before sleeping the connection):
			
 
				- * 
			
 
				+ *
			
 
				  *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
			
 
				  *  The second argument is a boolean return value. True indicates that there
			
 
				  *  are pending messages on the ring (i.e., the connection should not be put
			
 
				  *  to sleep).
			
 
				- * 
			
 
				+ *
			
 
				  *  These macros will set the req_event/rsp_event field to trigger a
			
 
				  *  notification on the very next message that is enqueued. If you want to
			
 
				  *  create batches of work (i.e., only receive a notification after several
			
--- a/include/hw/xen/interface/io/usbif.h
+++ b/include/hw/xen/interface/io/usbif.h
@@ -32,6 +32,34 @@
 
				 #include "../grant_table.h"
			
 
				 
			
 
				 /*
			
 
				+ * Detailed Interface Description
			
 
				+ * ==============================
			
 
				+ * The pvUSB interface is using a split driver design: a frontend driver in
			
 
				+ * the guest and a backend driver in a driver domain (normally dom0) having
			
 
				+ * access to the physical USB device(s) being passed to the guest.
			
 
				+ *
			
 
				+ * The frontend and backend drivers use XenStore to initiate the connection
			
 
				+ * between them, the I/O activity is handled via two shared ring pages and an
			
 
				+ * event channel. As the interface between frontend and backend is at the USB
			
 
				+ * host connector level, multiple (up to 31) physical USB devices can be
			
 
				+ * handled by a single connection.
			
 
				+ *
			
 
				+ * The Xen pvUSB device name is "qusb", so the frontend's XenStore entries are
			
 
				+ * to be found under "device/qusb", while the backend's XenStore entries are
			
 
				+ * under "backend/<guest-dom-id>/qusb".
			
 
				+ *
			
 
				+ * When a new pvUSB connection is established, the frontend needs to setup the
			
 
				+ * two shared ring pages for communication and the event channel. The ring
			
 
				+ * pages need to be made available to the backend via the grant table
			
 
				+ * interface.
			
 
				+ *
			
 
				+ * One of the shared ring pages is used by the backend to inform the frontend
			
 
				+ * about USB device plug events (device to be added or removed). This is the
			
 
				+ * "conn-ring".
			
 
				+ *
			
 
				+ * The other ring page is used for USB I/O communication (requests and
			
 
				+ * responses). This is the "urb-ring".
			
 
				+ *
			
 
				  * Feature and Parameter Negotiation
			
 
				  * =================================
			
 
				  * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
			
@@ -99,130 +127,273 @@
 
				  *      The machine ABI rules governing the format of all ring request and
			
 
				  *      response structures.
			
 
				  *
			
 
				+ * Protocol Description
			
 
				+ * ====================
			
 
				+ *
			
 
				+ *-------------------------- USB device plug events --------------------------
			
 
				+ *
			
 
				+ * USB device plug events are send via the "conn-ring" shared page. As only
			
 
				+ * events are being sent, the respective requests from the frontend to the
			
 
				+ * backend are just dummy ones.
			
 
				+ * The events sent to the frontend have the following layout:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |               id                |    portnum     |     speed      | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   id - uint16_t, event id (taken from the actual frontend dummy request)
			
 
				+ *   portnum - uint8_t, port number (1 ... 31)
			
 
				+ *   speed - uint8_t, device USBIF_SPEED_*, USBIF_SPEED_NONE == unplug
			
 
				+ *
			
 
				+ * The dummy request:
			
 
				+ *         0                1        octet
			
 
				+ * +----------------+----------------+
			
 
				+ * |               id                | 2
			
 
				+ * +----------------+----------------+
			
 
				+ *   id - uint16_t, guest supplied value (no need for being unique)
			
 
				+ *
			
 
				+ *-------------------------- USB I/O request ---------------------------------
			
 
				+ *
			
 
				+ * A single USB I/O request on the "urb-ring" has the following layout:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |               id                |         nr_buffer_segs          | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                               pipe                                | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |         transfer_flags          |          buffer_length          | 12
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                       request type specific                       | 16
			
 
				+ * |                               data                                | 20
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                              seg[0]                               | 24
			
 
				+ * |                               data                                | 28
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |             seg[USBIF_MAX_SEGMENTS_PER_REQUEST - 1]               | 144
			
 
				+ * |                               data                                | 148
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * Bit field bit number 0 is always least significant bit, undefined bits must
			
 
				+ * be zero.
			
 
				+ *   id - uint16_t, guest supplied value
			
 
				+ *   nr_buffer_segs - uint16_t, number of segment entries in seg[] array
			
 
				+ *   pipe - uint32_t, bit field with multiple information:
			
 
				+ *     bits 0-4: port request to send to
			
 
				+ *     bit 5: unlink request with specified id (cancel I/O) if set (see below)
			
 
				+ *     bit 7: direction (1 = read from device)
			
 
				+ *     bits 8-14: device number on port
			
 
				+ *     bits 15-18: endpoint of device
			
 
				+ *     bits 30-31: request type: 00 = isochronous, 01 = interrupt,
			
 
				+ *                               10 = control, 11 = bulk
			
 
				+ *   transfer_flags - uint16_t, bit field with processing flags:
			
 
				+ *     bit 0: less data than specified allowed
			
 
				+ *   buffer_length - uint16_t, total length of data
			
 
				+ *   request type specific data - 8 bytes, see below
			
 
				+ *   seg[] - array with 8 byte elements, see below
			
 
				+ *
			
 
				+ * Request type specific data for isochronous request:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |            interval             |           start_frame           | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |       number_of_packets         |       nr_frame_desc_segs        | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   interval - uint16_t, time interval in msecs between frames
			
 
				+ *   start_frame - uint16_t, start frame number
			
 
				+ *   number_of_packets - uint16_t, number of packets to transfer
			
 
				+ *   nr_frame_desc_segs - uint16_t number of seg[] frame descriptors elements
			
 
				+ *
			
 
				+ * Request type specific data for interrupt request:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |            interval             |                0                | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                                 0                                 | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   interval - uint16_t, time in msecs until interruption
			
 
				+ *
			
 
				+ * Request type specific data for control request:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                      data of setup packet                         | 4
			
 
				+ * |                                                                   | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *
			
 
				+ * Request type specific data for bulk request:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                                 0                                 | 4
			
 
				+ * |                                 0                                 | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *
			
 
				+ * Request type specific data for unlink request:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |           unlink_id             |                0                | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                                 0                                 | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   unlink_id - uint16_t, request id of request to terminate
			
 
				+ *
			
 
				+ * seg[] array element layout:
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                               gref                                | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |             offset              |             length              | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   gref - uint32_t, grant reference of buffer page
			
 
				+ *   offset - uint16_t, offset of buffer start in page
			
 
				+ *   length - uint16_t, length of buffer in page
			
 
				+ *
			
 
				+ *-------------------------- USB I/O response --------------------------------
			
 
				+ *
			
 
				+ *         0                1                 2               3        octet
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |               id                |          start_frame            | 4
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                              status                               | 8
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                          actual_length                            | 12
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ * |                           error_count                             | 16
			
 
				+ * +----------------+----------------+----------------+----------------+
			
 
				+ *   id - uint16_t, id of the request this response belongs to
			
 
				+ *   start_frame - uint16_t, start_frame this response (iso requests only)
			
 
				+ *   status - int32_t, USBIF_STATUS_* (non-iso requests)
			
 
				+ *   actual_length - uint32_t, actual size of data transferred
			
 
				+ *   error_count - uint32_t, number of errors (iso requests)
			
 
				  */
			
 
				 
			
 
				 enum usb_spec_version {
			
 
				-	USB_VER_UNKNOWN = 0,
			
 
				-	USB_VER_USB11,
			
 
				-	USB_VER_USB20,
			
 
				-	USB_VER_USB30,	/* not supported yet */
			
 
				+    USB_VER_UNKNOWN = 0,
			
 
				+    USB_VER_USB11,
			
 
				+    USB_VER_USB20,
			
 
				+    USB_VER_USB30,    /* not supported yet */
			
 
				 };
			
 
				 
			
 
				 /*
			
 
				  *  USB pipe in usbif_request
			
 
				  *
			
 
				- *  - port number:	bits 0-4
			
 
				- *				(USB_MAXCHILDREN is 31)
			
 
				+ *  - port number:      bits 0-4
			
 
				+ *                              (USB_MAXCHILDREN is 31)
			
 
				  *
			
 
				- *  - operation flag:	bit 5
			
 
				- *				(0 = submit urb,
			
 
				- *				 1 = unlink urb)
			
 
				+ *  - operation flag:   bit 5
			
 
				+ *                              (0 = submit urb,
			
 
				+ *                               1 = unlink urb)
			
 
				  *
			
 
				- *  - direction:		bit 7
			
 
				- *				(0 = Host-to-Device [Out]
			
 
				- *				 1 = Device-to-Host [In])
			
 
				+ *  - direction:        bit 7
			
 
				+ *                              (0 = Host-to-Device [Out]
			
 
				+ *                               1 = Device-to-Host [In])
			
 
				  *
			
 
				- *  - device address:	bits 8-14
			
 
				+ *  - device address:   bits 8-14
			
 
				  *
			
 
				- *  - endpoint:		bits 15-18
			
 
				+ *  - endpoint:         bits 15-18
			
 
				  *
			
 
				- *  - pipe type:	bits 30-31
			
 
				- *				(00 = isochronous, 01 = interrupt,
			
 
				- *				 10 = control, 11 = bulk)
			
 
				+ *  - pipe type:        bits 30-31
			
 
				+ *                              (00 = isochronous, 01 = interrupt,
			
 
				+ *                               10 = control, 11 = bulk)
			
 
				  */
			
 
				 
			
 
				-#define USBIF_PIPE_PORT_MASK	0x0000001f
			
 
				-#define USBIF_PIPE_UNLINK	0x00000020
			
 
				-#define USBIF_PIPE_DIR		0x00000080
			
 
				-#define USBIF_PIPE_DEV_MASK	0x0000007f
			
 
				-#define USBIF_PIPE_DEV_SHIFT	8
			
 
				-#define USBIF_PIPE_EP_MASK	0x0000000f
			
 
				-#define USBIF_PIPE_EP_SHIFT	15
			
 
				-#define USBIF_PIPE_TYPE_MASK	0x00000003
			
 
				-#define USBIF_PIPE_TYPE_SHIFT	30
			
 
				-#define USBIF_PIPE_TYPE_ISOC	0
			
 
				-#define USBIF_PIPE_TYPE_INT	1
			
 
				-#define USBIF_PIPE_TYPE_CTRL	2
			
 
				-#define USBIF_PIPE_TYPE_BULK	3
			
 
				-
			
 
				-#define usbif_pipeportnum(pipe)			((pipe) & USBIF_PIPE_PORT_MASK)
			
 
				-#define usbif_setportnum_pipe(pipe, portnum)	((pipe) | (portnum))
			
 
				-
			
 
				-#define usbif_pipeunlink(pipe)			((pipe) & USBIF_PIPE_UNLINK)
			
 
				-#define usbif_pipesubmit(pipe)			(!usbif_pipeunlink(pipe))
			
 
				-#define usbif_setunlink_pipe(pipe)		((pipe) | USBIF_PIPE_UNLINK)
			
 
				-
			
 
				-#define usbif_pipein(pipe)			((pipe) & USBIF_PIPE_DIR)
			
 
				-#define usbif_pipeout(pipe)			(!usbif_pipein(pipe))
			
 
				-
			
 
				-#define usbif_pipedevice(pipe)			\
			
 
				-		(((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
			
 
				-
			
 
				-#define usbif_pipeendpoint(pipe)		\
			
 
				-		(((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
			
 
				-
			
 
				-#define usbif_pipetype(pipe)			\
			
 
				-		(((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
			
 
				-#define usbif_pipeisoc(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
			
 
				-#define usbif_pipeint(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
			
 
				-#define usbif_pipectrl(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
			
 
				-#define usbif_pipebulk(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
			
 
				+#define USBIF_PIPE_PORT_MASK    0x0000001f
			
 
				+#define USBIF_PIPE_UNLINK       0x00000020
			
 
				+#define USBIF_PIPE_DIR          0x00000080
			
 
				+#define USBIF_PIPE_DEV_MASK     0x0000007f
			
 
				+#define USBIF_PIPE_DEV_SHIFT    8
			
 
				+#define USBIF_PIPE_EP_MASK      0x0000000f
			
 
				+#define USBIF_PIPE_EP_SHIFT     15
			
 
				+#define USBIF_PIPE_TYPE_MASK    0x00000003
			
 
				+#define USBIF_PIPE_TYPE_SHIFT   30
			
 
				+#define USBIF_PIPE_TYPE_ISOC    0
			
 
				+#define USBIF_PIPE_TYPE_INT     1
			
 
				+#define USBIF_PIPE_TYPE_CTRL    2
			
 
				+#define USBIF_PIPE_TYPE_BULK    3
			
 
				+
			
 
				+#define usbif_pipeportnum(pipe)                 ((pipe) & USBIF_PIPE_PORT_MASK)
			
 
				+#define usbif_setportnum_pipe(pipe, portnum)    ((pipe) | (portnum))
			
 
				+
			
 
				+#define usbif_pipeunlink(pipe)                  ((pipe) & USBIF_PIPE_UNLINK)
			
 
				+#define usbif_pipesubmit(pipe)                  (!usbif_pipeunlink(pipe))
			
 
				+#define usbif_setunlink_pipe(pipe)              ((pipe) | USBIF_PIPE_UNLINK)
			
 
				+
			
 
				+#define usbif_pipein(pipe)                      ((pipe) & USBIF_PIPE_DIR)
			
 
				+#define usbif_pipeout(pipe)                     (!usbif_pipein(pipe))
			
 
				+
			
 
				+#define usbif_pipedevice(pipe)                  \
			
 
				+        (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
			
 
				+
			
 
				+#define usbif_pipeendpoint(pipe)                \
			
 
				+        (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
			
 
				+
			
 
				+#define usbif_pipetype(pipe)                    \
			
 
				+        (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
			
 
				+#define usbif_pipeisoc(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
			
 
				+#define usbif_pipeint(pipe)     (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
			
 
				+#define usbif_pipectrl(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
			
 
				+#define usbif_pipebulk(pipe)    (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
			
 
				 
			
 
				 #define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
			
 
				-#define USBIF_MAX_PORTNR	31
			
 
				-#define USBIF_RING_SIZE	4096
			
 
				+#define USBIF_MAX_PORTNR        31
			
 
				+#define USBIF_RING_SIZE         4096
			
 
				 
			
 
				 /*
			
 
				  * RING for transferring urbs.
			
 
				  */
			
 
				 struct usbif_request_segment {
			
 
				-	grant_ref_t gref;
			
 
				-	uint16_t offset;
			
 
				-	uint16_t length;
			
 
				+    grant_ref_t gref;
			
 
				+    uint16_t offset;
			
 
				+    uint16_t length;
			
 
				 };
			
 
				 
			
 
				 struct usbif_urb_request {
			
 
				-	uint16_t id; /* request id */
			
 
				-	uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
			
 
				-
			
 
				-	/* basic urb parameter */
			
 
				-	uint32_t pipe;
			
 
				-	uint16_t transfer_flags;
			
 
				-#define USBIF_SHORT_NOT_OK	0x0001
			
 
				-	uint16_t buffer_length;
			
 
				-	union {
			
 
				-		uint8_t ctrl[8]; /* setup_packet (Ctrl) */
			
 
				-
			
 
				-		struct {
			
 
				-			uint16_t interval; /* maximum (1024*8) in usb core */
			
 
				-			uint16_t start_frame; /* start frame */
			
 
				-			uint16_t number_of_packets; /* number of ISO packet */
			
 
				-			uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
			
 
				-		} isoc;
			
 
				-
			
 
				-		struct {
			
 
				-			uint16_t interval; /* maximum (1024*8) in usb core */
			
 
				-			uint16_t pad[3];
			
 
				-		} intr;
			
 
				-
			
 
				-		struct {
			
 
				-			uint16_t unlink_id; /* unlink request id */
			
 
				-			uint16_t pad[3];
			
 
				-		} unlink;
			
 
				-
			
 
				-	} u;
			
 
				-
			
 
				-	/* urb data segments */
			
 
				-	struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+    uint16_t id;                  /* request id */
			
 
				+    uint16_t nr_buffer_segs;      /* number of urb->transfer_buffer segments */
			
 
				+
			
 
				+    /* basic urb parameter */
			
 
				+    uint32_t pipe;
			
 
				+    uint16_t transfer_flags;
			
 
				+#define USBIF_SHORT_NOT_OK      0x0001
			
 
				+    uint16_t buffer_length;
			
 
				+    union {
			
 
				+        uint8_t ctrl[8];                 /* setup_packet (Ctrl) */
			
 
				+
			
 
				+        struct {
			
 
				+            uint16_t interval;           /* maximum (1024*8) in usb core */
			
 
				+            uint16_t start_frame;        /* start frame */
			
 
				+            uint16_t number_of_packets;  /* number of ISO packet */
			
 
				+            uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
			
 
				+        } isoc;
			
 
				+
			
 
				+        struct {
			
 
				+            uint16_t interval;           /* maximum (1024*8) in usb core */
			
 
				+            uint16_t pad[3];
			
 
				+        } intr;
			
 
				+
			
 
				+        struct {
			
 
				+            uint16_t unlink_id;          /* unlink request id */
			
 
				+            uint16_t pad[3];
			
 
				+        } unlink;
			
 
				+
			
 
				+    } u;
			
 
				+
			
 
				+    /* urb data segments */
			
 
				+    struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				 };
			
 
				 typedef struct usbif_urb_request usbif_urb_request_t;
			
 
				 
			
 
				 struct usbif_urb_response {
			
 
				-	uint16_t id; /* request id */
			
 
				-	uint16_t start_frame;  /* start frame (ISO) */
			
 
				-	int32_t status; /* status (non-ISO) */
			
 
				-	int32_t actual_length; /* actual transfer length */
			
 
				-	int32_t error_count; /* number of ISO errors */
			
 
				+    uint16_t id;           /* request id */
			
 
				+    uint16_t start_frame;  /* start frame (ISO) */
			
 
				+    int32_t status;        /* status (non-ISO) */
			
 
				+#define USBIF_STATUS_OK         0
			
 
				+#define USBIF_STATUS_NODEV      (-19)
			
 
				+#define USBIF_STATUS_INVAL      (-22)
			
 
				+#define USBIF_STATUS_STALL      (-32)
			
 
				+#define USBIF_STATUS_IOERROR    (-71)
			
 
				+#define USBIF_STATUS_BABBLE     (-75)
			
 
				+#define USBIF_STATUS_SHUTDOWN   (-108)
			
 
				+    int32_t actual_length; /* actual transfer length */
			
 
				+    int32_t error_count;   /* number of ISO errors */
			
 
				 };
			
 
				 typedef struct usbif_urb_response usbif_urb_response_t;
			
 
				 
			
@@ -233,18 +404,18 @@ DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response
 
				  * RING for notifying connect/disconnect events to frontend
			
 
				  */
			
 
				 struct usbif_conn_request {
			
 
				-	uint16_t id;
			
 
				+    uint16_t id;
			
 
				 };
			
 
				 typedef struct usbif_conn_request usbif_conn_request_t;
			
 
				 
			
 
				 struct usbif_conn_response {
			
 
				-	uint16_t id; /* request id */
			
 
				-	uint8_t portnum; /* port number */
			
 
				-	uint8_t speed; /* usb_device_speed */
			
 
				-#define USBIF_SPEED_NONE	0
			
 
				-#define USBIF_SPEED_LOW		1
			
 
				-#define USBIF_SPEED_FULL	2
			
 
				-#define USBIF_SPEED_HIGH	3
			
 
				+    uint16_t id;           /* request id */
			
 
				+    uint8_t portnum;       /* port number */
			
 
				+    uint8_t speed;         /* usb_device_speed */
			
 
				+#define USBIF_SPEED_NONE        0
			
 
				+#define USBIF_SPEED_LOW         1
			
 
				+#define USBIF_SPEED_FULL        2
			
 
				+#define USBIF_SPEED_HIGH        3
			
 
				 };
			
 
				 typedef struct usbif_conn_response usbif_conn_response_t;
			
 
				 
			
--- a/include/hw/xen/interface/io/xenbus.h
+++ b/include/hw/xen/interface/io/xenbus.h
@@ -68,3 +68,13 @@ enum xenbus_state {
 
				 typedef enum xenbus_state XenbusState;
			
 
				 
			
 
				 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/io/xs_wire.h
+++ b/include/hw/xen/interface/io/xs_wire.h
@@ -0,0 +1,153 @@
 
				+/*
			
 
				+ * Details of the "wire" protocol between Xen Store Daemon and client
			
 
				+ * library or guest kernel.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (C) 2005 Rusty Russell IBM Corporation
			
 
				+ */
			
 
				+
			
 
				+#ifndef _XS_WIRE_H
			
 
				+#define _XS_WIRE_H
			
 
				+
			
 
				+enum xsd_sockmsg_type
			
 
				+{
			
 
				+    XS_CONTROL,
			
 
				+#define XS_DEBUG XS_CONTROL
			
 
				+    XS_DIRECTORY,
			
 
				+    XS_READ,
			
 
				+    XS_GET_PERMS,
			
 
				+    XS_WATCH,
			
 
				+    XS_UNWATCH,
			
 
				+    XS_TRANSACTION_START,
			
 
				+    XS_TRANSACTION_END,
			
 
				+    XS_INTRODUCE,
			
 
				+    XS_RELEASE,
			
 
				+    XS_GET_DOMAIN_PATH,
			
 
				+    XS_WRITE,
			
 
				+    XS_MKDIR,
			
 
				+    XS_RM,
			
 
				+    XS_SET_PERMS,
			
 
				+    XS_WATCH_EVENT,
			
 
				+    XS_ERROR,
			
 
				+    XS_IS_DOMAIN_INTRODUCED,
			
 
				+    XS_RESUME,
			
 
				+    XS_SET_TARGET,
			
 
				+    /* XS_RESTRICT has been removed */
			
 
				+    XS_RESET_WATCHES = XS_SET_TARGET + 2,
			
 
				+    XS_DIRECTORY_PART,
			
 
				+
			
 
				+    XS_TYPE_COUNT,      /* Number of valid types. */
			
 
				+
			
 
				+    XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */
			
 
				+};
			
 
				+
			
 
				+#define XS_WRITE_NONE "NONE"
			
 
				+#define XS_WRITE_CREATE "CREATE"
			
 
				+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
			
 
				+
			
 
				+/* We hand errors as strings, for portability. */
			
 
				+struct xsd_errors
			
 
				+{
			
 
				+    int errnum;
			
 
				+    const char *errstring;
			
 
				+};
			
 
				+#ifdef EINVAL
			
 
				+#define XSD_ERROR(x) { x, #x }
			
 
				+/* LINTED: static unused */
			
 
				+static struct xsd_errors xsd_errors[]
			
 
				+#if defined(__GNUC__)
			
 
				+__attribute__((unused))
			
 
				+#endif
			
 
				+    = {
			
 
				+    XSD_ERROR(EINVAL),
			
 
				+    XSD_ERROR(EACCES),
			
 
				+    XSD_ERROR(EEXIST),
			
 
				+    XSD_ERROR(EISDIR),
			
 
				+    XSD_ERROR(ENOENT),
			
 
				+    XSD_ERROR(ENOMEM),
			
 
				+    XSD_ERROR(ENOSPC),
			
 
				+    XSD_ERROR(EIO),
			
 
				+    XSD_ERROR(ENOTEMPTY),
			
 
				+    XSD_ERROR(ENOSYS),
			
 
				+    XSD_ERROR(EROFS),
			
 
				+    XSD_ERROR(EBUSY),
			
 
				+    XSD_ERROR(EAGAIN),
			
 
				+    XSD_ERROR(EISCONN),
			
 
				+    XSD_ERROR(E2BIG)
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+struct xsd_sockmsg
			
 
				+{
			
 
				+    uint32_t type;  /* XS_??? */
			
 
				+    uint32_t req_id;/* Request identifier, echoed in daemon's response.  */
			
 
				+    uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
			
 
				+    uint32_t len;   /* Length of data following this. */
			
 
				+
			
 
				+    /* Generally followed by nul-terminated string(s). */
			
 
				+};
			
 
				+
			
 
				+enum xs_watch_type
			
 
				+{
			
 
				+    XS_WATCH_PATH = 0,
			
 
				+    XS_WATCH_TOKEN
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 150 xenstore_struct XenStore wire protocol.
			
 
				+ *
			
 
				+ * Inter-domain shared memory communications. */
			
 
				+#define XENSTORE_RING_SIZE 1024
			
 
				+typedef uint32_t XENSTORE_RING_IDX;
			
 
				+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
			
 
				+struct xenstore_domain_interface {
			
 
				+    char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
			
 
				+    char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
			
 
				+    XENSTORE_RING_IDX req_cons, req_prod;
			
 
				+    XENSTORE_RING_IDX rsp_cons, rsp_prod;
			
 
				+    uint32_t server_features; /* Bitmap of features supported by the server */
			
 
				+    uint32_t connection;
			
 
				+};
			
 
				+
			
 
				+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
			
 
				+#define XENSTORE_PAYLOAD_MAX 4096
			
 
				+
			
 
				+/* Violating these just gets you an error back */
			
 
				+#define XENSTORE_ABS_PATH_MAX 3072
			
 
				+#define XENSTORE_REL_PATH_MAX 2048
			
 
				+
			
 
				+/* The ability to reconnect a ring */
			
 
				+#define XENSTORE_SERVER_FEATURE_RECONNECTION 1
			
 
				+
			
 
				+/* Valid values for the connection field */
			
 
				+#define XENSTORE_CONNECTED 0 /* the steady-state */
			
 
				+#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */
			
 
				+
			
 
				+#endif /* _XS_WIRE_H */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/memory.h
+++ b/include/hw/xen/interface/memory.h
@@ -0,0 +1,754 @@
 
				+/******************************************************************************
			
 
				+ * memory.h
			
 
				+ *
			
 
				+ * Memory reservation and information.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_MEMORY_H__
			
 
				+#define __XEN_PUBLIC_MEMORY_H__
			
 
				+
			
 
				+#include "xen.h"
			
 
				+#include "physdev.h"
			
 
				+
			
 
				+/*
			
 
				+ * Increase or decrease the specified domain's memory reservation. Returns the
			
 
				+ * number of extents successfully allocated or freed.
			
 
				+ * arg == addr of struct xen_memory_reservation.
			
 
				+ */
			
 
				+#define XENMEM_increase_reservation 0
			
 
				+#define XENMEM_decrease_reservation 1
			
 
				+#define XENMEM_populate_physmap     6
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
			
 
				+/*
			
 
				+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O
			
 
				+ * devices often have a 32-bit limitation even in 64-bit systems). If zero
			
 
				+ * then the user has no addressing restriction. This field is not used by
			
 
				+ * XENMEM_decrease_reservation.
			
 
				+ */
			
 
				+#define XENMEMF_address_bits(x)     (x)
			
 
				+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
			
 
				+/* NUMA node to allocate from. */
			
 
				+#define XENMEMF_node(x)     (((x) + 1) << 8)
			
 
				+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
			
 
				+/* Flag to populate physmap with populate-on-demand entries */
			
 
				+#define XENMEMF_populate_on_demand (1<<16)
			
 
				+/* Flag to request allocation only from the node specified */
			
 
				+#define XENMEMF_exact_node_request  (1<<17)
			
 
				+#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
			
 
				+/* Flag to indicate the node specified is virtual node */
			
 
				+#define XENMEMF_vnode  (1<<18)
			
 
				+#endif
			
 
				+
			
 
				+struct xen_memory_reservation {
			
 
				+
			
 
				+    /*
			
 
				+     * XENMEM_increase_reservation:
			
 
				+     *   OUT: MFN (*not* GMFN) bases of extents that were allocated
			
 
				+     * XENMEM_decrease_reservation:
			
 
				+     *   IN:  GMFN bases of extents to free
			
 
				+     * XENMEM_populate_physmap:
			
 
				+     *   IN:  GPFN bases of extents to populate with memory
			
 
				+     *   OUT: GMFN bases of extents that were allocated
			
 
				+     *   (NB. This command also updates the mach_to_phys translation table)
			
 
				+     * XENMEM_claim_pages:
			
 
				+     *   IN: must be zero
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
			
 
				+
			
 
				+    /* Number of extents, and size/alignment of each (2^extent_order pages). */
			
 
				+    xen_ulong_t    nr_extents;
			
 
				+    unsigned int   extent_order;
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
			
 
				+    /* XENMEMF flags. */
			
 
				+    unsigned int   mem_flags;
			
 
				+#else
			
 
				+    unsigned int   address_bits;
			
 
				+#endif
			
 
				+
			
 
				+    /*
			
 
				+     * Domain whose reservation is being changed.
			
 
				+     * Unprivileged domains can specify only DOMID_SELF.
			
 
				+     */
			
 
				+    domid_t        domid;
			
 
				+};
			
 
				+typedef struct xen_memory_reservation xen_memory_reservation_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
			
 
				+
			
 
				+/*
			
 
				+ * An atomic exchange of memory pages. If return code is zero then
			
 
				+ * @out.extent_list provides GMFNs of the newly-allocated memory.
			
 
				+ * Returns zero on complete success, otherwise a negative error code.
			
 
				+ * On complete success then always @nr_exchanged == @in.nr_extents.
			
 
				+ * On partial success @nr_exchanged indicates how much work was done.
			
 
				+ *
			
 
				+ * Note that only PV guests can use this operation.
			
 
				+ */
			
 
				+#define XENMEM_exchange             11
			
 
				+struct xen_memory_exchange {
			
 
				+    /*
			
 
				+     * [IN] Details of memory extents to be exchanged (GMFN bases).
			
 
				+     * Note that @in.address_bits is ignored and unused.
			
 
				+     */
			
 
				+    struct xen_memory_reservation in;
			
 
				+
			
 
				+    /*
			
 
				+     * [IN/OUT] Details of new memory extents.
			
 
				+     * We require that:
			
 
				+     *  1. @in.domid == @out.domid
			
 
				+     *  2. @in.nr_extents  << @in.extent_order ==
			
 
				+     *     @out.nr_extents << @out.extent_order
			
 
				+     *  3. @in.extent_start and @out.extent_start lists must not overlap
			
 
				+     *  4. @out.extent_start lists GPFN bases to be populated
			
 
				+     *  5. @out.extent_start is overwritten with allocated GMFN bases
			
 
				+     */
			
 
				+    struct xen_memory_reservation out;
			
 
				+
			
 
				+    /*
			
 
				+     * [OUT] Number of input extents that were successfully exchanged:
			
 
				+     *  1. The first @nr_exchanged input extents were successfully
			
 
				+     *     deallocated.
			
 
				+     *  2. The corresponding first entries in the output extent list correctly
			
 
				+     *     indicate the GMFNs that were successfully exchanged.
			
 
				+     *  3. All other input and output extents are untouched.
			
 
				+     *  4. If not all input exents are exchanged then the return code of this
			
 
				+     *     command will be non-zero.
			
 
				+     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
			
 
				+     */
			
 
				+    xen_ulong_t nr_exchanged;
			
 
				+};
			
 
				+typedef struct xen_memory_exchange xen_memory_exchange_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
			
 
				+
			
 
				+/*
			
 
				+ * Returns the maximum machine frame number of mapped RAM in this system.
			
 
				+ * This command always succeeds (it never returns an error code).
			
 
				+ * arg == NULL.
			
 
				+ */
			
 
				+#define XENMEM_maximum_ram_page     2
			
 
				+
			
 
				+struct xen_memory_domain {
			
 
				+    /* [IN] Domain information is being queried for. */
			
 
				+    domid_t domid;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Returns the current or maximum memory reservation, in pages, of the
			
 
				+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
			
 
				+ * arg == addr of struct xen_memory_domain.
			
 
				+ */
			
 
				+#define XENMEM_current_reservation  3
			
 
				+#define XENMEM_maximum_reservation  4
			
 
				+
			
 
				+/*
			
 
				+ * Returns the maximum GFN in use by the specified domain (may be DOMID_SELF).
			
 
				+ * Returns -ve errcode on failure.
			
 
				+ * arg == addr of struct xen_memory_domain.
			
 
				+ */
			
 
				+#define XENMEM_maximum_gpfn         14
			
 
				+
			
 
				+/*
			
 
				+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
			
 
				+ * mapping table. Architectures which do not have a m2p table do not implement
			
 
				+ * this command.
			
 
				+ * arg == addr of xen_machphys_mfn_list_t.
			
 
				+ */
			
 
				+#define XENMEM_machphys_mfn_list    5
			
 
				+struct xen_machphys_mfn_list {
			
 
				+    /*
			
 
				+     * Size of the 'extent_start' array. Fewer entries will be filled if the
			
 
				+     * machphys table is smaller than max_extents * 2MB.
			
 
				+     */
			
 
				+    unsigned int max_extents;
			
 
				+
			
 
				+    /*
			
 
				+     * Pointer to buffer to fill with list of extent starts. If there are
			
 
				+     * any large discontiguities in the machine address space, 2MB gaps in
			
 
				+     * the machphys table will be represented by an MFN base of zero.
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
			
 
				+
			
 
				+    /*
			
 
				+     * Number of extents written to the above array. This will be smaller
			
 
				+     * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
			
 
				+     */
			
 
				+    unsigned int nr_extents;
			
 
				+};
			
 
				+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
			
 
				+
			
 
				+/*
			
 
				+ * For a compat caller, this is identical to XENMEM_machphys_mfn_list.
			
 
				+ *
			
 
				+ * For a non compat caller, this functions similarly to
			
 
				+ * XENMEM_machphys_mfn_list, but returns the mfns making up the compatibility
			
 
				+ * m2p table.
			
 
				+ */
			
 
				+#define XENMEM_machphys_compat_mfn_list     25
			
 
				+
			
 
				+/*
			
 
				+ * Returns the location in virtual address space of the machine_to_phys
			
 
				+ * mapping table. Architectures which do not have a m2p table, or which do not
			
 
				+ * map it by default into guest address space, do not implement this command.
			
 
				+ * arg == addr of xen_machphys_mapping_t.
			
 
				+ */
			
 
				+#define XENMEM_machphys_mapping     12
			
 
				+struct xen_machphys_mapping {
			
 
				+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
			
 
				+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
			
 
				+};
			
 
				+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
			
 
				+
			
 
				+/* Source mapping space. */
			
 
				+/* ` enum phys_map_space { */
			
 
				+#define XENMAPSPACE_shared_info  0 /* shared info page */
			
 
				+#define XENMAPSPACE_grant_table  1 /* grant table page */
			
 
				+#define XENMAPSPACE_gmfn         2 /* GMFN */
			
 
				+#define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap only. */
			
 
				+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom,
			
 
				+                                    * XENMEM_add_to_physmap_batch only. */
			
 
				+#define XENMAPSPACE_dev_mmio     5 /* device mmio region
			
 
				+                                      ARM only; the region is mapped in
			
 
				+                                      Stage-2 using the Normal Memory
			
 
				+                                      Inner/Outer Write-Back Cacheable
			
 
				+                                      memory attribute. */
			
 
				+/* ` } */
			
 
				+
			
 
				+/*
			
 
				+ * Sets the GPFN at which a particular page appears in the specified guest's
			
 
				+ * physical address space (translated guests only).
			
 
				+ * arg == addr of xen_add_to_physmap_t.
			
 
				+ */
			
 
				+#define XENMEM_add_to_physmap      7
			
 
				+struct xen_add_to_physmap {
			
 
				+    /* Which domain to change the mapping for. */
			
 
				+    domid_t domid;
			
 
				+
			
 
				+    /* Number of pages to go through for gmfn_range */
			
 
				+    uint16_t    size;
			
 
				+
			
 
				+    unsigned int space; /* => enum phys_map_space */
			
 
				+
			
 
				+#define XENMAPIDX_grant_table_status 0x80000000
			
 
				+
			
 
				+    /* Index into space being mapped. */
			
 
				+    xen_ulong_t idx;
			
 
				+
			
 
				+    /* GPFN in domid where the source mapping page should appear. */
			
 
				+    xen_pfn_t     gpfn;
			
 
				+};
			
 
				+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
			
 
				+
			
 
				+/* A batched version of add_to_physmap. */
			
 
				+#define XENMEM_add_to_physmap_batch 23
			
 
				+struct xen_add_to_physmap_batch {
			
 
				+    /* IN */
			
 
				+    /* Which domain to change the mapping for. */
			
 
				+    domid_t domid;
			
 
				+    uint16_t space; /* => enum phys_map_space */
			
 
				+
			
 
				+    /* Number of pages to go through */
			
 
				+    uint16_t size;
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040700
			
 
				+    domid_t foreign_domid; /* IFF gmfn_foreign. Should be 0 for other spaces. */
			
 
				+#else
			
 
				+    union xen_add_to_physmap_batch_extra {
			
 
				+        domid_t foreign_domid; /* gmfn_foreign */
			
 
				+        uint16_t res0;  /* All the other spaces. Should be 0 */
			
 
				+    } u;
			
 
				+#endif
			
 
				+
			
 
				+    /* Indexes into space being mapped. */
			
 
				+    XEN_GUEST_HANDLE(xen_ulong_t) idxs;
			
 
				+
			
 
				+    /* GPFN in domid where the source mapping page should appear. */
			
 
				+    XEN_GUEST_HANDLE(xen_pfn_t) gpfns;
			
 
				+
			
 
				+    /* OUT */
			
 
				+
			
 
				+    /* Per index error code. */
			
 
				+    XEN_GUEST_HANDLE(int) errs;
			
 
				+};
			
 
				+typedef struct xen_add_to_physmap_batch xen_add_to_physmap_batch_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_batch_t);
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040400
			
 
				+#define XENMEM_add_to_physmap_range XENMEM_add_to_physmap_batch
			
 
				+#define xen_add_to_physmap_range xen_add_to_physmap_batch
			
 
				+typedef struct xen_add_to_physmap_batch xen_add_to_physmap_range_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t);
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Unmaps the page appearing at a particular GPFN from the specified guest's
			
 
				+ * physical address space (translated guests only).
			
 
				+ * arg == addr of xen_remove_from_physmap_t.
			
 
				+ */
			
 
				+#define XENMEM_remove_from_physmap      15
			
 
				+struct xen_remove_from_physmap {
			
 
				+    /* Which domain to change the mapping for. */
			
 
				+    domid_t domid;
			
 
				+
			
 
				+    /* GPFN of the current mapping of the page. */
			
 
				+    xen_pfn_t     gpfn;
			
 
				+};
			
 
				+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
			
 
				+
			
 
				+/*** REMOVED ***/
			
 
				+/*#define XENMEM_translate_gpfn_list  8*/
			
 
				+
			
 
				+/*
			
 
				+ * Returns the pseudo-physical memory map as it was when the domain
			
 
				+ * was started (specified by XENMEM_set_memory_map).
			
 
				+ * arg == addr of xen_memory_map_t.
			
 
				+ */
			
 
				+#define XENMEM_memory_map           9
			
 
				+struct xen_memory_map {
			
 
				+    /*
			
 
				+     * On call the number of entries which can be stored in buffer. On
			
 
				+     * return the number of entries which have been stored in
			
 
				+     * buffer.
			
 
				+     */
			
 
				+    unsigned int nr_entries;
			
 
				+
			
 
				+    /*
			
 
				+     * Entries in the buffer are in the same format as returned by the
			
 
				+     * BIOS INT 0x15 EAX=0xE820 call.
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(void) buffer;
			
 
				+};
			
 
				+typedef struct xen_memory_map xen_memory_map_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
			
 
				+
			
 
				+/*
			
 
				+ * Returns the real physical memory map. Passes the same structure as
			
 
				+ * XENMEM_memory_map.
			
 
				+ * Specifying buffer as NULL will return the number of entries required
			
 
				+ * to store the complete memory map.
			
 
				+ * arg == addr of xen_memory_map_t.
			
 
				+ */
			
 
				+#define XENMEM_machine_memory_map   10
			
 
				+
			
 
				+/*
			
 
				+ * Set the pseudo-physical memory map of a domain, as returned by
			
 
				+ * XENMEM_memory_map.
			
 
				+ * arg == addr of xen_foreign_memory_map_t.
			
 
				+ */
			
 
				+#define XENMEM_set_memory_map       13
			
 
				+struct xen_foreign_memory_map {
			
 
				+    domid_t domid;
			
 
				+    struct xen_memory_map map;
			
 
				+};
			
 
				+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
			
 
				+
			
 
				+#define XENMEM_set_pod_target       16
			
 
				+#define XENMEM_get_pod_target       17
			
 
				+struct xen_pod_target {
			
 
				+    /* IN */
			
 
				+    uint64_t target_pages;
			
 
				+    /* OUT */
			
 
				+    uint64_t tot_pages;
			
 
				+    uint64_t pod_cache_pages;
			
 
				+    uint64_t pod_entries;
			
 
				+    /* IN */
			
 
				+    domid_t domid;
			
 
				+};
			
 
				+typedef struct xen_pod_target xen_pod_target_t;
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+#ifndef uint64_aligned_t
			
 
				+#define uint64_aligned_t uint64_t
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Get the number of MFNs saved through memory sharing.
			
 
				+ * The call never fails.
			
 
				+ */
			
 
				+#define XENMEM_get_sharing_freed_pages    18
			
 
				+#define XENMEM_get_sharing_shared_pages   19
			
 
				+
			
 
				+#define XENMEM_paging_op                    20
			
 
				+#define XENMEM_paging_op_nominate           0
			
 
				+#define XENMEM_paging_op_evict              1
			
 
				+#define XENMEM_paging_op_prep               2
			
 
				+
			
 
				+struct xen_mem_paging_op {
			
 
				+    uint8_t     op;         /* XENMEM_paging_op_* */
			
 
				+    domid_t     domain;
			
 
				+
			
 
				+    /* IN: (XENMEM_paging_op_prep) buffer to immediately fill page from */
			
 
				+    XEN_GUEST_HANDLE_64(const_uint8) buffer;
			
 
				+    /* IN:  gfn of page being operated on */
			
 
				+    uint64_aligned_t    gfn;
			
 
				+};
			
 
				+typedef struct xen_mem_paging_op xen_mem_paging_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_op_t);
			
 
				+
			
 
				+#define XENMEM_access_op                    21
			
 
				+#define XENMEM_access_op_set_access         0
			
 
				+#define XENMEM_access_op_get_access         1
			
 
				+/*
			
 
				+ * XENMEM_access_op_enable_emulate and XENMEM_access_op_disable_emulate are
			
 
				+ * currently unused, but since they have been in use please do not reuse them.
			
 
				+ *
			
 
				+ * #define XENMEM_access_op_enable_emulate     2
			
 
				+ * #define XENMEM_access_op_disable_emulate    3
			
 
				+ */
			
 
				+#define XENMEM_access_op_set_access_multi   4
			
 
				+
			
 
				+typedef enum {
			
 
				+    XENMEM_access_n,
			
 
				+    XENMEM_access_r,
			
 
				+    XENMEM_access_w,
			
 
				+    XENMEM_access_rw,
			
 
				+    XENMEM_access_x,
			
 
				+    XENMEM_access_rx,
			
 
				+    XENMEM_access_wx,
			
 
				+    XENMEM_access_rwx,
			
 
				+    /*
			
 
				+     * Page starts off as r-x, but automatically
			
 
				+     * change to r-w on a write
			
 
				+     */
			
 
				+    XENMEM_access_rx2rw,
			
 
				+    /*
			
 
				+     * Log access: starts off as n, automatically
			
 
				+     * goes to rwx, generating an event without
			
 
				+     * pausing the vcpu
			
 
				+     */
			
 
				+    XENMEM_access_n2rwx,
			
 
				+    /* Take the domain default */
			
 
				+    XENMEM_access_default
			
 
				+} xenmem_access_t;
			
 
				+
			
 
				+struct xen_mem_access_op {
			
 
				+    /* XENMEM_access_op_* */
			
 
				+    uint8_t op;
			
 
				+    /* xenmem_access_t */
			
 
				+    uint8_t access;
			
 
				+    domid_t domid;
			
 
				+    /*
			
 
				+     * Number of pages for set op (or size of pfn_list for
			
 
				+     * XENMEM_access_op_set_access_multi)
			
 
				+     * Ignored on setting default access and other ops
			
 
				+     */
			
 
				+    uint32_t nr;
			
 
				+    /*
			
 
				+     * First pfn for set op
			
 
				+     * pfn for get op
			
 
				+     * ~0ull is used to set and get the default access for pages
			
 
				+     */
			
 
				+    uint64_aligned_t pfn;
			
 
				+    /*
			
 
				+     * List of pfns to set access for
			
 
				+     * Used only with XENMEM_access_op_set_access_multi
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(const_uint64) pfn_list;
			
 
				+    /*
			
 
				+     * Corresponding list of access settings for pfn_list
			
 
				+     * Used only with XENMEM_access_op_set_access_multi
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(const_uint8) access_list;
			
 
				+};
			
 
				+typedef struct xen_mem_access_op xen_mem_access_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
			
 
				+
			
 
				+#define XENMEM_sharing_op                   22
			
 
				+#define XENMEM_sharing_op_nominate_gfn      0
			
 
				+#define XENMEM_sharing_op_nominate_gref     1
			
 
				+#define XENMEM_sharing_op_share             2
			
 
				+#define XENMEM_sharing_op_debug_gfn         3
			
 
				+#define XENMEM_sharing_op_debug_mfn         4
			
 
				+#define XENMEM_sharing_op_debug_gref        5
			
 
				+#define XENMEM_sharing_op_add_physmap       6
			
 
				+#define XENMEM_sharing_op_audit             7
			
 
				+#define XENMEM_sharing_op_range_share       8
			
 
				+#define XENMEM_sharing_op_fork              9
			
 
				+#define XENMEM_sharing_op_fork_reset        10
			
 
				+
			
 
				+#define XENMEM_SHARING_OP_S_HANDLE_INVALID  (-10)
			
 
				+#define XENMEM_SHARING_OP_C_HANDLE_INVALID  (-9)
			
 
				+
			
 
				+/* The following allows sharing of grant refs. This is useful
			
 
				+ * for sharing utilities sitting as "filters" in IO backends
			
 
				+ * (e.g. memshr + blktap(2)). The IO backend is only exposed
			
 
				+ * to grant references, and this allows sharing of the grefs */
			
 
				+#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG   (xen_mk_ullong(1) << 62)
			
 
				+
			
 
				+#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val)  \
			
 
				+    (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val)
			
 
				+#define XENMEM_SHARING_OP_FIELD_IS_GREF(field)         \
			
 
				+    ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)
			
 
				+#define XENMEM_SHARING_OP_FIELD_GET_GREF(field)        \
			
 
				+    ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG))
			
 
				+
			
 
				+struct xen_mem_sharing_op {
			
 
				+    uint8_t     op;     /* XENMEM_sharing_op_* */
			
 
				+    domid_t     domain;
			
 
				+
			
 
				+    union {
			
 
				+        struct mem_sharing_op_nominate {  /* OP_NOMINATE_xxx           */
			
 
				+            union {
			
 
				+                uint64_aligned_t gfn;     /* IN: gfn to nominate       */
			
 
				+                uint32_t      grant_ref;  /* IN: grant ref to nominate */
			
 
				+            } u;
			
 
				+            uint64_aligned_t  handle;     /* OUT: the handle           */
			
 
				+        } nominate;
			
 
				+        struct mem_sharing_op_share {     /* OP_SHARE/ADD_PHYSMAP */
			
 
				+            uint64_aligned_t source_gfn;    /* IN: the gfn of the source page */
			
 
				+            uint64_aligned_t source_handle; /* IN: handle to the source page */
			
 
				+            uint64_aligned_t client_gfn;    /* IN: the client gfn */
			
 
				+            uint64_aligned_t client_handle; /* IN: handle to the client page */
			
 
				+            domid_t  client_domain; /* IN: the client domain id */
			
 
				+        } share;
			
 
				+        struct mem_sharing_op_range {         /* OP_RANGE_SHARE */
			
 
				+            uint64_aligned_t first_gfn;      /* IN: the first gfn */
			
 
				+            uint64_aligned_t last_gfn;       /* IN: the last gfn */
			
 
				+            uint64_aligned_t opaque;         /* Must be set to 0 */
			
 
				+            domid_t client_domain;           /* IN: the client domain id */
			
 
				+            uint16_t _pad[3];                /* Must be set to 0 */
			
 
				+        } range;
			
 
				+        struct mem_sharing_op_debug {     /* OP_DEBUG_xxx */
			
 
				+            union {
			
 
				+                uint64_aligned_t gfn;      /* IN: gfn to debug          */
			
 
				+                uint64_aligned_t mfn;      /* IN: mfn to debug          */
			
 
				+                uint32_t gref;     /* IN: gref to debug         */
			
 
				+            } u;
			
 
				+        } debug;
			
 
				+        struct mem_sharing_op_fork {      /* OP_FORK */
			
 
				+            domid_t parent_domain;        /* IN: parent's domain id */
			
 
				+/* Only makes sense for short-lived forks */
			
 
				+#define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0)
			
 
				+/* Only makes sense for short-lived forks */
			
 
				+#define XENMEM_FORK_BLOCK_INTERRUPTS   (1u << 1)
			
 
				+            uint16_t flags;               /* IN: optional settings */
			
 
				+            uint32_t pad;                 /* Must be set to 0 */
			
 
				+        } fork;
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
			
 
				+
			
 
				+/*
			
 
				+ * Attempt to stake a claim for a domain on a quantity of pages
			
 
				+ * of system RAM, but _not_ assign specific pageframes.  Only
			
 
				+ * arithmetic is performed so the hypercall is very fast and need
			
 
				+ * not be preemptible, thus sidestepping time-of-check-time-of-use
			
 
				+ * races for memory allocation.  Returns 0 if the hypervisor page
			
 
				+ * allocator has atomically and successfully claimed the requested
			
 
				+ * number of pages, else non-zero.
			
 
				+ *
			
 
				+ * Any domain may have only one active claim.  When sufficient memory
			
 
				+ * has been allocated to resolve the claim, the claim silently expires.
			
 
				+ * Claiming zero pages effectively resets any outstanding claim and
			
 
				+ * is always successful.
			
 
				+ *
			
 
				+ * Note that a valid claim may be staked even after memory has been
			
 
				+ * allocated for a domain.  In this case, the claim is not incremental,
			
 
				+ * i.e. if the domain's total page count is 3, and a claim is staked
			
 
				+ * for 10, only 7 additional pages are claimed.
			
 
				+ *
			
 
				+ * Caller must be privileged or the hypercall fails.
			
 
				+ */
			
 
				+#define XENMEM_claim_pages                  24
			
 
				+
			
 
				+/*
			
 
				+ * XENMEM_claim_pages flags - the are no flags at this time.
			
 
				+ * The zero value is appropriate.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * With some legacy devices, certain guest-physical addresses cannot safely
			
 
				+ * be used for other purposes, e.g. to map guest RAM.  This hypercall
			
 
				+ * enumerates those regions so the toolstack can avoid using them.
			
 
				+ */
			
 
				+#define XENMEM_reserved_device_memory_map   27
			
 
				+struct xen_reserved_device_memory {
			
 
				+    xen_pfn_t start_pfn;
			
 
				+    xen_ulong_t nr_pages;
			
 
				+};
			
 
				+typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
			
 
				+
			
 
				+struct xen_reserved_device_memory_map {
			
 
				+#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */
			
 
				+    /* IN */
			
 
				+    uint32_t flags;
			
 
				+    /*
			
 
				+     * IN/OUT
			
 
				+     *
			
 
				+     * Gets set to the required number of entries when too low,
			
 
				+     * signaled by error code -ERANGE.
			
 
				+     */
			
 
				+    unsigned int nr_entries;
			
 
				+    /* OUT */
			
 
				+    XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer;
			
 
				+    /* IN */
			
 
				+    union {
			
 
				+        physdev_pci_device_t pci;
			
 
				+    } dev;
			
 
				+};
			
 
				+typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t);
			
 
				+
			
 
				+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
			
 
				+
			
 
				+/*
			
 
				+ * Get the pages for a particular guest resource, so that they can be
			
 
				+ * mapped directly by a tools domain.
			
 
				+ */
			
 
				+#define XENMEM_acquire_resource 28
			
 
				+struct xen_mem_acquire_resource {
			
 
				+    /* IN - The domain whose resource is to be mapped */
			
 
				+    domid_t domid;
			
 
				+    /* IN - the type of resource */
			
 
				+    uint16_t type;
			
 
				+
			
 
				+#define XENMEM_resource_ioreq_server 0
			
 
				+#define XENMEM_resource_grant_table 1
			
 
				+#define XENMEM_resource_vmtrace_buf 2
			
 
				+
			
 
				+    /*
			
 
				+     * IN - a type-specific resource identifier, which must be zero
			
 
				+     *      unless stated otherwise.
			
 
				+     *
			
 
				+     * type == XENMEM_resource_ioreq_server -> id == ioreq server id
			
 
				+     * type == XENMEM_resource_grant_table -> id defined below
			
 
				+     */
			
 
				+    uint32_t id;
			
 
				+
			
 
				+#define XENMEM_resource_grant_table_id_shared 0
			
 
				+#define XENMEM_resource_grant_table_id_status 1
			
 
				+
			
 
				+    /*
			
 
				+     * IN/OUT
			
 
				+     *
			
 
				+     * As an IN parameter number of frames of the resource to be mapped.
			
 
				+     * This value may be updated over the course of the operation.
			
 
				+     *
			
 
				+     * When frame_list is NULL and nr_frames is 0, this is interpreted as a
			
 
				+     * request for the size of the resource, which shall be returned in the
			
 
				+     * nr_frames field.
			
 
				+     *
			
 
				+     * The size of a resource will never be zero, but a nonzero result doesn't
			
 
				+     * guarantee that a subsequent mapping request will be successful.  There
			
 
				+     * are further type/id specific constraints which may change between the
			
 
				+     * two calls.
			
 
				+     */
			
 
				+    uint32_t nr_frames;
			
 
				+    uint32_t pad;
			
 
				+    /*
			
 
				+     * IN - the index of the initial frame to be mapped. This parameter
			
 
				+     *      is ignored if nr_frames is 0.  This value may be updated
			
 
				+     *      over the course of the operation.
			
 
				+     */
			
 
				+    uint64_t frame;
			
 
				+
			
 
				+#define XENMEM_resource_ioreq_server_frame_bufioreq 0
			
 
				+#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n))
			
 
				+
			
 
				+    /*
			
 
				+     * IN/OUT - If the tools domain is PV then, upon return, frame_list
			
 
				+     *          will be populated with the MFNs of the resource.
			
 
				+     *          If the tools domain is HVM then it is expected that, on
			
 
				+     *          entry, frame_list will be populated with a list of GFNs
			
 
				+     *          that will be mapped to the MFNs of the resource.
			
 
				+     *          If -EIO is returned then the frame_list has only been
			
 
				+     *          partially mapped and it is up to the caller to unmap all
			
 
				+     *          the GFNs.
			
 
				+     *          This parameter may be NULL if nr_frames is 0.  This
			
 
				+     *          value may be updated over the course of the operation.
			
 
				+     */
			
 
				+    XEN_GUEST_HANDLE(xen_pfn_t) frame_list;
			
 
				+};
			
 
				+typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
			
 
				+
			
 
				+/*
			
 
				+ * XENMEM_get_vnumainfo used by guest to get
			
 
				+ * vNUMA topology from hypervisor.
			
 
				+ */
			
 
				+#define XENMEM_get_vnumainfo                26
			
 
				+
			
 
				+/* vNUMA node memory ranges */
			
 
				+struct xen_vmemrange {
			
 
				+    uint64_t start, end;
			
 
				+    unsigned int flags;
			
 
				+    unsigned int nid;
			
 
				+};
			
 
				+typedef struct xen_vmemrange xen_vmemrange_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t);
			
 
				+
			
 
				+/*
			
 
				+ * vNUMA topology specifies vNUMA node number, distance table,
			
 
				+ * memory ranges and vcpu mapping provided for guests.
			
 
				+ * XENMEM_get_vnumainfo hypercall expects to see from guest
			
 
				+ * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory.
			
 
				+ * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus
			
 
				+ * copied back to guest. Domain returns expected values of nr_vnodes,
			
 
				+ * nr_vmemranges and nr_vcpus to guest if the values where incorrect.
			
 
				+ */
			
 
				+struct xen_vnuma_topology_info {
			
 
				+    /* IN */
			
 
				+    domid_t domid;
			
 
				+    uint16_t pad;
			
 
				+    /* IN/OUT */
			
 
				+    unsigned int nr_vnodes;
			
 
				+    unsigned int nr_vcpus;
			
 
				+    unsigned int nr_vmemranges;
			
 
				+    /* OUT */
			
 
				+    union {
			
 
				+        XEN_GUEST_HANDLE(uint) h;
			
 
				+        uint64_t pad;
			
 
				+    } vdistance;
			
 
				+    union {
			
 
				+        XEN_GUEST_HANDLE(uint) h;
			
 
				+        uint64_t pad;
			
 
				+    } vcpu_to_vnode;
			
 
				+    union {
			
 
				+        XEN_GUEST_HANDLE(xen_vmemrange_t) h;
			
 
				+        uint64_t pad;
			
 
				+    } vmemrange;
			
 
				+};
			
 
				+typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
			
 
				+
			
 
				+/* Next available subop number is 29 */
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_MEMORY_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/physdev.h
+++ b/include/hw/xen/interface/physdev.h
@@ -0,0 +1,383 @@
 
				+/*
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2006, Keir Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_PHYSDEV_H__
			
 
				+#define __XEN_PUBLIC_PHYSDEV_H__
			
 
				+
			
 
				+#include "xen.h"
			
 
				+
			
 
				+/*
			
 
				+ * Prototype for this hypercall is:
			
 
				+ *  int physdev_op(int cmd, void *args)
			
 
				+ * @cmd  == PHYSDEVOP_??? (physdev operation).
			
 
				+ * @args == Operation-specific extra arguments (NULL if none).
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Notify end-of-interrupt (EOI) for the specified IRQ.
			
 
				+ * @arg == pointer to physdev_eoi structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_eoi                   12
			
 
				+struct physdev_eoi {
			
 
				+    /* IN */
			
 
				+    uint32_t irq;
			
 
				+};
			
 
				+typedef struct physdev_eoi physdev_eoi_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
			
 
				+
			
 
				+/*
			
 
				+ * Register a shared page for the hypervisor to indicate whether the guest
			
 
				+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
			
 
				+ * once the guest used this function in that the associated event channel
			
 
				+ * will automatically get unmasked. The page registered is used as a bit
			
 
				+ * array indexed by Xen's PIRQ value.
			
 
				+ */
			
 
				+#define PHYSDEVOP_pirq_eoi_gmfn_v1       17
			
 
				+/*
			
 
				+ * Register a shared page for the hypervisor to indicate whether the
			
 
				+ * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to
			
 
				+ * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of
			
 
				+ * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by
			
 
				+ * Xen's PIRQ value.
			
 
				+ */
			
 
				+#define PHYSDEVOP_pirq_eoi_gmfn_v2       28
			
 
				+struct physdev_pirq_eoi_gmfn {
			
 
				+    /* IN */
			
 
				+    xen_pfn_t gmfn;
			
 
				+};
			
 
				+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
			
 
				+
			
 
				+/*
			
 
				+ * Query the status of an IRQ line.
			
 
				+ * @arg == pointer to physdev_irq_status_query structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_irq_status_query       5
			
 
				+struct physdev_irq_status_query {
			
 
				+    /* IN */
			
 
				+    uint32_t irq;
			
 
				+    /* OUT */
			
 
				+    uint32_t flags; /* XENIRQSTAT_* */
			
 
				+};
			
 
				+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
			
 
				+
			
 
				+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
			
 
				+#define _XENIRQSTAT_needs_eoi   (0)
			
 
				+#define  XENIRQSTAT_needs_eoi   (1U<<_XENIRQSTAT_needs_eoi)
			
 
				+
			
 
				+/* IRQ shared by multiple guests? */
			
 
				+#define _XENIRQSTAT_shared      (1)
			
 
				+#define  XENIRQSTAT_shared      (1U<<_XENIRQSTAT_shared)
			
 
				+
			
 
				+/*
			
 
				+ * Set the current VCPU's I/O privilege level.
			
 
				+ * @arg == pointer to physdev_set_iopl structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_set_iopl               6
			
 
				+struct physdev_set_iopl {
			
 
				+    /* IN */
			
 
				+    uint32_t iopl;
			
 
				+};
			
 
				+typedef struct physdev_set_iopl physdev_set_iopl_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
			
 
				+
			
 
				+/*
			
 
				+ * Set the current VCPU's I/O-port permissions bitmap.
			
 
				+ * @arg == pointer to physdev_set_iobitmap structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_set_iobitmap           7
			
 
				+struct physdev_set_iobitmap {
			
 
				+    /* IN */
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
			
 
				+    XEN_GUEST_HANDLE(uint8) bitmap;
			
 
				+#else
			
 
				+    uint8_t *bitmap;
			
 
				+#endif
			
 
				+    uint32_t nr_ports;
			
 
				+};
			
 
				+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
			
 
				+
			
 
				+/*
			
 
				+ * Read or write an IO-APIC register.
			
 
				+ * @arg == pointer to physdev_apic structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_apic_read              8
			
 
				+#define PHYSDEVOP_apic_write             9
			
 
				+struct physdev_apic {
			
 
				+    /* IN */
			
 
				+    unsigned long apic_physbase;
			
 
				+    uint32_t reg;
			
 
				+    /* IN or OUT */
			
 
				+    uint32_t value;
			
 
				+};
			
 
				+typedef struct physdev_apic physdev_apic_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
			
 
				+
			
 
				+/*
			
 
				+ * Allocate or free a physical upcall vector for the specified IRQ line.
			
 
				+ * @arg == pointer to physdev_irq structure.
			
 
				+ */
			
 
				+#define PHYSDEVOP_alloc_irq_vector      10
			
 
				+#define PHYSDEVOP_free_irq_vector       11
			
 
				+struct physdev_irq {
			
 
				+    /* IN */
			
 
				+    uint32_t irq;
			
 
				+    /* IN or OUT */
			
 
				+    uint32_t vector;
			
 
				+};
			
 
				+typedef struct physdev_irq physdev_irq_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
			
 
				+
			
 
				+#define MAP_PIRQ_TYPE_MSI               0x0
			
 
				+#define MAP_PIRQ_TYPE_GSI               0x1
			
 
				+#define MAP_PIRQ_TYPE_UNKNOWN           0x2
			
 
				+#define MAP_PIRQ_TYPE_MSI_SEG           0x3
			
 
				+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
			
 
				+
			
 
				+#define PHYSDEVOP_map_pirq               13
			
 
				+struct physdev_map_pirq {
			
 
				+    domid_t domid;
			
 
				+    /* IN */
			
 
				+    int type;
			
 
				+    /* IN (ignored for ..._MULTI_MSI) */
			
 
				+    int index;
			
 
				+    /* IN or OUT */
			
 
				+    int pirq;
			
 
				+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
			
 
				+    int bus;
			
 
				+    /* IN */
			
 
				+    int devfn;
			
 
				+    /* IN (also OUT for ..._MULTI_MSI) */
			
 
				+    int entry_nr;
			
 
				+    /* IN */
			
 
				+    uint64_t table_base;
			
 
				+};
			
 
				+typedef struct physdev_map_pirq physdev_map_pirq_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
			
 
				+
			
 
				+#define PHYSDEVOP_unmap_pirq             14
			
 
				+struct physdev_unmap_pirq {
			
 
				+    domid_t domid;
			
 
				+    /* IN */
			
 
				+    int pirq;
			
 
				+};
			
 
				+
			
 
				+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
			
 
				+
			
 
				+#define PHYSDEVOP_manage_pci_add         15
			
 
				+#define PHYSDEVOP_manage_pci_remove      16
			
 
				+struct physdev_manage_pci {
			
 
				+    /* IN */
			
 
				+    uint8_t bus;
			
 
				+    uint8_t devfn;
			
 
				+};
			
 
				+
			
 
				+typedef struct physdev_manage_pci physdev_manage_pci_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
			
 
				+
			
 
				+#define PHYSDEVOP_restore_msi            19
			
 
				+struct physdev_restore_msi {
			
 
				+    /* IN */
			
 
				+    uint8_t bus;
			
 
				+    uint8_t devfn;
			
 
				+};
			
 
				+typedef struct physdev_restore_msi physdev_restore_msi_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
			
 
				+
			
 
				+#define PHYSDEVOP_manage_pci_add_ext     20
			
 
				+struct physdev_manage_pci_ext {
			
 
				+    /* IN */
			
 
				+    uint8_t bus;
			
 
				+    uint8_t devfn;
			
 
				+    unsigned is_extfn;
			
 
				+    unsigned is_virtfn;
			
 
				+    struct {
			
 
				+        uint8_t bus;
			
 
				+        uint8_t devfn;
			
 
				+    } physfn;
			
 
				+};
			
 
				+
			
 
				+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);
			
 
				+
			
 
				+/*
			
 
				+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
			
 
				+ * hypercall since 0x00030202.
			
 
				+ */
			
 
				+struct physdev_op {
			
 
				+    uint32_t cmd;
			
 
				+    union {
			
 
				+        physdev_irq_status_query_t irq_status_query;
			
 
				+        physdev_set_iopl_t         set_iopl;
			
 
				+        physdev_set_iobitmap_t     set_iobitmap;
			
 
				+        physdev_apic_t             apic_op;
			
 
				+        physdev_irq_t              irq_op;
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct physdev_op physdev_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
			
 
				+
			
 
				+#define PHYSDEVOP_setup_gsi    21
			
 
				+struct physdev_setup_gsi {
			
 
				+    int gsi;
			
 
				+    /* IN */
			
 
				+    uint8_t triggering;
			
 
				+    /* IN */
			
 
				+    uint8_t polarity;
			
 
				+    /* IN */
			
 
				+};
			
 
				+
			
 
				+typedef struct physdev_setup_gsi physdev_setup_gsi_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t);
			
 
				+
			
 
				+/* leave PHYSDEVOP 22 free */
			
 
				+
			
 
				+/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI
			
 
				+ * the hypercall returns a free pirq */
			
 
				+#define PHYSDEVOP_get_free_pirq    23
			
 
				+struct physdev_get_free_pirq {
			
 
				+    /* IN */
			
 
				+    int type;
			
 
				+    /* OUT */
			
 
				+    uint32_t pirq;
			
 
				+};
			
 
				+
			
 
				+typedef struct physdev_get_free_pirq physdev_get_free_pirq_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t);
			
 
				+
			
 
				+#define XEN_PCI_MMCFG_RESERVED         0x1
			
 
				+
			
 
				+#define PHYSDEVOP_pci_mmcfg_reserved    24
			
 
				+struct physdev_pci_mmcfg_reserved {
			
 
				+    uint64_t address;
			
 
				+    uint16_t segment;
			
 
				+    uint8_t start_bus;
			
 
				+    uint8_t end_bus;
			
 
				+    uint32_t flags;
			
 
				+};
			
 
				+typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t);
			
 
				+
			
 
				+#define XEN_PCI_DEV_EXTFN              0x1
			
 
				+#define XEN_PCI_DEV_VIRTFN             0x2
			
 
				+#define XEN_PCI_DEV_PXM                0x4
			
 
				+
			
 
				+#define PHYSDEVOP_pci_device_add        25
			
 
				+struct physdev_pci_device_add {
			
 
				+    /* IN */
			
 
				+    uint16_t seg;
			
 
				+    uint8_t bus;
			
 
				+    uint8_t devfn;
			
 
				+    uint32_t flags;
			
 
				+    struct {
			
 
				+        uint8_t bus;
			
 
				+        uint8_t devfn;
			
 
				+    } physfn;
			
 
				+    /*
			
 
				+     * Optional parameters array.
			
 
				+     * First element ([0]) is PXM domain associated with the device (if
			
 
				+     * XEN_PCI_DEV_PXM is set)
			
 
				+     */
			
 
				+    uint32_t optarr[XEN_FLEX_ARRAY_DIM];
			
 
				+};
			
 
				+typedef struct physdev_pci_device_add physdev_pci_device_add_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t);
			
 
				+
			
 
				+#define PHYSDEVOP_pci_device_remove     26
			
 
				+#define PHYSDEVOP_restore_msi_ext       27
			
 
				+/*
			
 
				+ * Dom0 should use these two to announce MMIO resources assigned to
			
 
				+ * MSI-X capable devices won't (prepare) or may (release) change.
			
 
				+ */
			
 
				+#define PHYSDEVOP_prepare_msix          30
			
 
				+#define PHYSDEVOP_release_msix          31
			
 
				+struct physdev_pci_device {
			
 
				+    /* IN */
			
 
				+    uint16_t seg;
			
 
				+    uint8_t bus;
			
 
				+    uint8_t devfn;
			
 
				+};
			
 
				+typedef struct physdev_pci_device physdev_pci_device_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t);
			
 
				+
			
 
				+#define PHYSDEVOP_DBGP_RESET_PREPARE    1
			
 
				+#define PHYSDEVOP_DBGP_RESET_DONE       2
			
 
				+
			
 
				+#define PHYSDEVOP_DBGP_BUS_UNKNOWN      0
			
 
				+#define PHYSDEVOP_DBGP_BUS_PCI          1
			
 
				+
			
 
				+#define PHYSDEVOP_dbgp_op               29
			
 
				+struct physdev_dbgp_op {
			
 
				+    /* IN */
			
 
				+    uint8_t op;
			
 
				+    uint8_t bus;
			
 
				+    union {
			
 
				+        physdev_pci_device_t pci;
			
 
				+    } u;
			
 
				+};
			
 
				+typedef struct physdev_dbgp_op physdev_dbgp_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t);
			
 
				+
			
 
				+/*
			
 
				+ * Notify that some PIRQ-bound event channels have been unmasked.
			
 
				+ * ** This command is obsolete since interface version 0x00030202 and is **
			
 
				+ * ** unsupported by newer versions of Xen.                              **
			
 
				+ */
			
 
				+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY      4
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040600
			
 
				+/*
			
 
				+ * These all-capitals physdev operation names are superceded by the new names
			
 
				+ * (defined above) since interface version 0x00030202. The guard above was
			
 
				+ * added post-4.5 only though and hence shouldn't check for 0x00030202.
			
 
				+ */
			
 
				+#define PHYSDEVOP_IRQ_STATUS_QUERY       PHYSDEVOP_irq_status_query
			
 
				+#define PHYSDEVOP_SET_IOPL               PHYSDEVOP_set_iopl
			
 
				+#define PHYSDEVOP_SET_IOBITMAP           PHYSDEVOP_set_iobitmap
			
 
				+#define PHYSDEVOP_APIC_READ              PHYSDEVOP_apic_read
			
 
				+#define PHYSDEVOP_APIC_WRITE             PHYSDEVOP_apic_write
			
 
				+#define PHYSDEVOP_ASSIGN_VECTOR          PHYSDEVOP_alloc_irq_vector
			
 
				+#define PHYSDEVOP_FREE_VECTOR            PHYSDEVOP_free_irq_vector
			
 
				+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
			
 
				+#define PHYSDEVOP_IRQ_SHARED             XENIRQSTAT_shared
			
 
				+#endif
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040200
			
 
				+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1
			
 
				+#else
			
 
				+#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2
			
 
				+#endif
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/sched.h
+++ b/include/hw/xen/interface/sched.h
@@ -0,0 +1,202 @@
 
				+/******************************************************************************
			
 
				+ * sched.h
			
 
				+ *
			
 
				+ * Scheduler state interactions
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_SCHED_H__
			
 
				+#define __XEN_PUBLIC_SCHED_H__
			
 
				+
			
 
				+#include "event_channel.h"
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 150 sched Guest Scheduler Operations
			
 
				+ *
			
 
				+ * The SCHEDOP interface provides mechanisms for a guest to interact
			
 
				+ * with the scheduler, including yield, blocking and shutting itself
			
 
				+ * down.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * The prototype for this hypercall is:
			
 
				+ * ` long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
			
 
				+ *
			
 
				+ * @cmd == SCHEDOP_??? (scheduler operation).
			
 
				+ * @arg == Operation-specific extra argument(s), as described below.
			
 
				+ * ...  == Additional Operation-specific extra arguments, described below.
			
 
				+ *
			
 
				+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
			
 
				+ * of this hypercall, supporting only the commands yield, block and shutdown:
			
 
				+ *  long sched_op(int cmd, unsigned long arg)
			
 
				+ * @cmd == SCHEDOP_??? (scheduler operation).
			
 
				+ * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
			
 
				+ *      == SHUTDOWN_* code (SCHEDOP_shutdown)
			
 
				+ *
			
 
				+ * This legacy version is available to new guests as:
			
 
				+ * ` long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
			
 
				+ */
			
 
				+
			
 
				+/* ` enum sched_op { // SCHEDOP_* => struct sched_* */
			
 
				+/*
			
 
				+ * Voluntarily yield the CPU.
			
 
				+ * @arg == NULL.
			
 
				+ */
			
 
				+#define SCHEDOP_yield       0
			
 
				+
			
 
				+/*
			
 
				+ * Block execution of this VCPU until an event is received for processing.
			
 
				+ * If called with event upcalls masked, this operation will atomically
			
 
				+ * reenable event delivery and check for pending events before blocking the
			
 
				+ * VCPU. This avoids a "wakeup waiting" race.
			
 
				+ * @arg == NULL.
			
 
				+ */
			
 
				+#define SCHEDOP_block       1
			
 
				+
			
 
				+/*
			
 
				+ * Halt execution of this domain (all VCPUs) and notify the system controller.
			
 
				+ * @arg == pointer to sched_shutdown_t structure.
			
 
				+ *
			
 
				+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
			
 
				+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
			
 
				+ * of the guest's start info page.  RDX/EDX is the third hypercall
			
 
				+ * argument.
			
 
				+ *
			
 
				+ * In addition, which reason is SHUTDOWN_suspend this hypercall
			
 
				+ * returns 1 if suspend was cancelled or the domain was merely
			
 
				+ * checkpointed, and 0 if it is resuming in a new domain.
			
 
				+ */
			
 
				+#define SCHEDOP_shutdown    2
			
 
				+
			
 
				+/*
			
 
				+ * Poll a set of event-channel ports. Return when one or more are pending. An
			
 
				+ * optional timeout may be specified.
			
 
				+ * @arg == pointer to sched_poll_t structure.
			
 
				+ */
			
 
				+#define SCHEDOP_poll        3
			
 
				+
			
 
				+/*
			
 
				+ * Declare a shutdown for another domain. The main use of this function is
			
 
				+ * in interpreting shutdown requests and reasons for fully-virtualized
			
 
				+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
			
 
				+ * @arg == pointer to sched_remote_shutdown_t structure.
			
 
				+ */
			
 
				+#define SCHEDOP_remote_shutdown        4
			
 
				+
			
 
				+/*
			
 
				+ * Latch a shutdown code, so that when the domain later shuts down it
			
 
				+ * reports this code to the control tools.
			
 
				+ * @arg == sched_shutdown_t, as for SCHEDOP_shutdown.
			
 
				+ */
			
 
				+#define SCHEDOP_shutdown_code 5
			
 
				+
			
 
				+/*
			
 
				+ * Setup, poke and destroy a domain watchdog timer.
			
 
				+ * @arg == pointer to sched_watchdog_t structure.
			
 
				+ * With id == 0, setup a domain watchdog timer to cause domain shutdown
			
 
				+ *               after timeout, returns watchdog id.
			
 
				+ * With id != 0 and timeout == 0, destroy domain watchdog timer.
			
 
				+ * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
			
 
				+ */
			
 
				+#define SCHEDOP_watchdog    6
			
 
				+
			
 
				+/*
			
 
				+ * Override the current vcpu affinity by pinning it to one physical cpu or
			
 
				+ * undo this override restoring the previous affinity.
			
 
				+ * @arg == pointer to sched_pin_override_t structure.
			
 
				+ *
			
 
				+ * A negative pcpu value will undo a previous pin override and restore the
			
 
				+ * previous cpu affinity.
			
 
				+ * This call is allowed for the hardware domain only and requires the cpu
			
 
				+ * to be part of the domain's cpupool.
			
 
				+ */
			
 
				+#define SCHEDOP_pin_override 7
			
 
				+/* ` } */
			
 
				+
			
 
				+struct sched_shutdown {
			
 
				+    unsigned int reason; /* SHUTDOWN_* => enum sched_shutdown_reason */
			
 
				+};
			
 
				+typedef struct sched_shutdown sched_shutdown_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
			
 
				+
			
 
				+struct sched_poll {
			
 
				+    XEN_GUEST_HANDLE(evtchn_port_t) ports;
			
 
				+    unsigned int nr_ports;
			
 
				+    uint64_t timeout;
			
 
				+};
			
 
				+typedef struct sched_poll sched_poll_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
			
 
				+
			
 
				+struct sched_remote_shutdown {
			
 
				+    domid_t domain_id;         /* Remote domain ID */
			
 
				+    unsigned int reason;       /* SHUTDOWN_* => enum sched_shutdown_reason */
			
 
				+};
			
 
				+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
			
 
				+
			
 
				+struct sched_watchdog {
			
 
				+    uint32_t id;                /* watchdog ID */
			
 
				+    uint32_t timeout;           /* timeout */
			
 
				+};
			
 
				+typedef struct sched_watchdog sched_watchdog_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t);
			
 
				+
			
 
				+struct sched_pin_override {
			
 
				+    int32_t pcpu;
			
 
				+};
			
 
				+typedef struct sched_pin_override sched_pin_override_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(sched_pin_override_t);
			
 
				+
			
 
				+/*
			
 
				+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
			
 
				+ * software to determine the appropriate action. For the most part, Xen does
			
 
				+ * not care about the shutdown code.
			
 
				+ */
			
 
				+/* ` enum sched_shutdown_reason { */
			
 
				+#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
			
 
				+#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
			
 
				+#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
			
 
				+#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
			
 
				+#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
			
 
				+
			
 
				+/*
			
 
				+ * Domain asked to perform 'soft reset' for it. The expected behavior is to
			
 
				+ * reset internal Xen state for the domain returning it to the point where it
			
 
				+ * was created but leaving the domain's memory contents and vCPU contexts
			
 
				+ * intact. This will allow the domain to start over and set up all Xen specific
			
 
				+ * interfaces again.
			
 
				+ */
			
 
				+#define SHUTDOWN_soft_reset 5
			
 
				+#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.             */
			
 
				+/* ` } */
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_SCHED_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/trace.h
+++ b/include/hw/xen/interface/trace.h
@@ -0,0 +1,341 @@
 
				+/******************************************************************************
			
 
				+ * include/public/trace.h
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Mark Williamson, (C) 2004 Intel Research Cambridge
			
 
				+ * Copyright (C) 2005 Bin Ren
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_TRACE_H__
			
 
				+#define __XEN_PUBLIC_TRACE_H__
			
 
				+
			
 
				+#define TRACE_EXTRA_MAX    7
			
 
				+#define TRACE_EXTRA_SHIFT 28
			
 
				+
			
 
				+/* Trace classes */
			
 
				+#define TRC_CLS_SHIFT 16
			
 
				+#define TRC_GEN      0x0001f000    /* General trace            */
			
 
				+#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */
			
 
				+#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
			
 
				+#define TRC_HVM      0x0008f000    /* Xen HVM trace            */
			
 
				+#define TRC_MEM      0x0010f000    /* Xen memory trace         */
			
 
				+#define TRC_PV       0x0020f000    /* Xen PV traces            */
			
 
				+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
			
 
				+#define TRC_HW       0x0080f000    /* Xen hardware-related traces */
			
 
				+#define TRC_GUEST    0x0800f000    /* Guest-generated traces   */
			
 
				+#define TRC_ALL      0x0ffff000
			
 
				+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
			
 
				+#define TRC_HD_CYCLE_FLAG (1UL<<31)
			
 
				+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
			
 
				+#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
			
 
				+
			
 
				+/* Trace subclasses */
			
 
				+#define TRC_SUBCLS_SHIFT 12
			
 
				+
			
 
				+/* trace subclasses for SVM */
			
 
				+#define TRC_HVM_ENTRYEXIT   0x00081000   /* VMENTRY and #VMEXIT       */
			
 
				+#define TRC_HVM_HANDLER     0x00082000   /* various HVM handlers      */
			
 
				+#define TRC_HVM_EMUL        0x00084000   /* emulated devices */
			
 
				+
			
 
				+#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
			
 
				+#define TRC_SCHED_CLASS     0x00022000   /* Scheduler-specific    */
			
 
				+#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
			
 
				+
			
 
				+/*
			
 
				+ * The highest 3 bits of the last 12 bits of TRC_SCHED_CLASS above are
			
 
				+ * reserved for encoding what scheduler produced the information. The
			
 
				+ * actual event is encoded in the last 9 bits.
			
 
				+ *
			
 
				+ * This means we have 8 scheduling IDs available (which means at most 8
			
 
				+ * schedulers generating events) and, in each scheduler, up to 512
			
 
				+ * different events.
			
 
				+ */
			
 
				+#define TRC_SCHED_ID_BITS 3
			
 
				+#define TRC_SCHED_ID_SHIFT (TRC_SUBCLS_SHIFT - TRC_SCHED_ID_BITS)
			
 
				+#define TRC_SCHED_ID_MASK (((1UL<<TRC_SCHED_ID_BITS) - 1) << TRC_SCHED_ID_SHIFT)
			
 
				+#define TRC_SCHED_EVT_MASK (~(TRC_SCHED_ID_MASK))
			
 
				+
			
 
				+/* Per-scheduler IDs, to identify scheduler specific events */
			
 
				+#define TRC_SCHED_CSCHED   0
			
 
				+#define TRC_SCHED_CSCHED2  1
			
 
				+/* #define XEN_SCHEDULER_SEDF 2 (Removed) */
			
 
				+#define TRC_SCHED_ARINC653 3
			
 
				+#define TRC_SCHED_RTDS     4
			
 
				+#define TRC_SCHED_SNULL    5
			
 
				+
			
 
				+/* Per-scheduler tracing */
			
 
				+#define TRC_SCHED_CLASS_EVT(_c, _e) \
			
 
				+  ( ( TRC_SCHED_CLASS | \
			
 
				+      ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \
			
 
				+    (_e & TRC_SCHED_EVT_MASK) )
			
 
				+
			
 
				+/* Trace classes for DOM0 operations */
			
 
				+#define TRC_DOM0_DOMOPS     0x00041000   /* Domains manipulations */
			
 
				+
			
 
				+/* Trace classes for Hardware */
			
 
				+#define TRC_HW_PM           0x00801000   /* Power management traces */
			
 
				+#define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling of IRQs */
			
 
				+
			
 
				+/* Trace events per class */
			
 
				+#define TRC_LOST_RECORDS        (TRC_GEN + 1)
			
 
				+#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
			
 
				+#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
			
 
				+
			
 
				+#define TRC_SCHED_RUNSTATE_CHANGE   (TRC_SCHED_MIN + 1)
			
 
				+#define TRC_SCHED_CONTINUE_RUNNING  (TRC_SCHED_MIN + 2)
			
 
				+#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
			
 
				+#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
			
 
				+#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
			
 
				+#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
			
 
				+#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
			
 
				+#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
			
 
				+#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
			
 
				+#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
			
 
				+#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
			
 
				+#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
			
 
				+#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
			
 
				+#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
			
 
				+#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
			
 
				+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
			
 
				+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
			
 
				+#define TRC_SCHED_SHUTDOWN_CODE  (TRC_SCHED_VERBOSE + 16)
			
 
				+#define TRC_SCHED_SWITCH_INFCONT (TRC_SCHED_VERBOSE + 17)
			
 
				+
			
 
				+#define TRC_DOM0_DOM_ADD         (TRC_DOM0_DOMOPS + 1)
			
 
				+#define TRC_DOM0_DOM_REM         (TRC_DOM0_DOMOPS + 2)
			
 
				+
			
 
				+#define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
			
 
				+#define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
			
 
				+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
			
 
				+#define TRC_MEM_SET_P2M_ENTRY       (TRC_MEM + 4)
			
 
				+#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5)
			
 
				+#define TRC_MEM_POD_POPULATE        (TRC_MEM + 16)
			
 
				+#define TRC_MEM_POD_ZERO_RECLAIM    (TRC_MEM + 17)
			
 
				+#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18)
			
 
				+
			
 
				+#define TRC_PV_ENTRY   0x00201000 /* Hypervisor entry points for PV guests. */
			
 
				+#define TRC_PV_SUBCALL 0x00202000 /* Sub-call in a multicall hypercall */
			
 
				+
			
 
				+#define TRC_PV_HYPERCALL             (TRC_PV_ENTRY +  1)
			
 
				+#define TRC_PV_TRAP                  (TRC_PV_ENTRY +  3)
			
 
				+#define TRC_PV_PAGE_FAULT            (TRC_PV_ENTRY +  4)
			
 
				+#define TRC_PV_FORCED_INVALID_OP     (TRC_PV_ENTRY +  5)
			
 
				+#define TRC_PV_EMULATE_PRIVOP        (TRC_PV_ENTRY +  6)
			
 
				+#define TRC_PV_EMULATE_4GB           (TRC_PV_ENTRY +  7)
			
 
				+#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV_ENTRY +  8)
			
 
				+#define TRC_PV_PAGING_FIXUP          (TRC_PV_ENTRY +  9)
			
 
				+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV_ENTRY + 10)
			
 
				+#define TRC_PV_PTWR_EMULATION        (TRC_PV_ENTRY + 11)
			
 
				+#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV_ENTRY + 12)
			
 
				+#define TRC_PV_HYPERCALL_V2          (TRC_PV_ENTRY + 13)
			
 
				+#define TRC_PV_HYPERCALL_SUBCALL     (TRC_PV_SUBCALL + 14)
			
 
				+
			
 
				+/*
			
 
				+ * TRC_PV_HYPERCALL_V2 format
			
 
				+ *
			
 
				+ * Only some of the hypercall argument are recorded. Bit fields A0 to
			
 
				+ * A5 in the first extra word are set if the argument is present and
			
 
				+ * the arguments themselves are packed sequentially in the following
			
 
				+ * words.
			
 
				+ *
			
 
				+ * The TRC_64_FLAG bit is not set for these events (even if there are
			
 
				+ * 64-bit arguments in the record).
			
 
				+ *
			
 
				+ * Word
			
 
				+ * 0    bit 31 30|29 28|27 26|25 24|23 22|21 20|19 ... 0
			
 
				+ *          A5   |A4   |A3   |A2   |A1   |A0   |Hypercall op
			
 
				+ * 1    First 32 bit (or low word of first 64 bit) arg in record
			
 
				+ * 2    Second 32 bit (or high word of first 64 bit) arg in record
			
 
				+ * ...
			
 
				+ *
			
 
				+ * A0-A5 bitfield values:
			
 
				+ *
			
 
				+ *   00b  Argument not present
			
 
				+ *   01b  32-bit argument present
			
 
				+ *   10b  64-bit argument present
			
 
				+ *   11b  Reserved
			
 
				+ */
			
 
				+#define TRC_PV_HYPERCALL_V2_ARG_32(i) (0x1 << (20 + 2*(i)))
			
 
				+#define TRC_PV_HYPERCALL_V2_ARG_64(i) (0x2 << (20 + 2*(i)))
			
 
				+#define TRC_PV_HYPERCALL_V2_ARG_MASK  (0xfff00000)
			
 
				+
			
 
				+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
			
 
				+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
			
 
				+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
			
 
				+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
			
 
				+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
			
 
				+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
			
 
				+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
			
 
				+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
			
 
				+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
			
 
				+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
			
 
				+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
			
 
				+#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
			
 
				+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
			
 
				+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
			
 
				+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
			
 
				+
			
 
				+/* trace events per subclass */
			
 
				+#define TRC_HVM_NESTEDFLAG      (0x400)
			
 
				+#define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
			
 
				+#define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
			
 
				+#define TRC_HVM_VMEXIT64        (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)
			
 
				+#define TRC_HVM_PF_XEN          (TRC_HVM_HANDLER + 0x01)
			
 
				+#define TRC_HVM_PF_XEN64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)
			
 
				+#define TRC_HVM_PF_INJECT       (TRC_HVM_HANDLER + 0x02)
			
 
				+#define TRC_HVM_PF_INJECT64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)
			
 
				+#define TRC_HVM_INJ_EXC         (TRC_HVM_HANDLER + 0x03)
			
 
				+#define TRC_HVM_INJ_VIRQ        (TRC_HVM_HANDLER + 0x04)
			
 
				+#define TRC_HVM_REINJ_VIRQ      (TRC_HVM_HANDLER + 0x05)
			
 
				+#define TRC_HVM_IO_READ         (TRC_HVM_HANDLER + 0x06)
			
 
				+#define TRC_HVM_IO_WRITE        (TRC_HVM_HANDLER + 0x07)
			
 
				+#define TRC_HVM_CR_READ         (TRC_HVM_HANDLER + 0x08)
			
 
				+#define TRC_HVM_CR_READ64       (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)
			
 
				+#define TRC_HVM_CR_WRITE        (TRC_HVM_HANDLER + 0x09)
			
 
				+#define TRC_HVM_CR_WRITE64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)
			
 
				+#define TRC_HVM_DR_READ         (TRC_HVM_HANDLER + 0x0A)
			
 
				+#define TRC_HVM_DR_WRITE        (TRC_HVM_HANDLER + 0x0B)
			
 
				+#define TRC_HVM_MSR_READ        (TRC_HVM_HANDLER + 0x0C)
			
 
				+#define TRC_HVM_MSR_WRITE       (TRC_HVM_HANDLER + 0x0D)
			
 
				+#define TRC_HVM_CPUID           (TRC_HVM_HANDLER + 0x0E)
			
 
				+#define TRC_HVM_INTR            (TRC_HVM_HANDLER + 0x0F)
			
 
				+#define TRC_HVM_NMI             (TRC_HVM_HANDLER + 0x10)
			
 
				+#define TRC_HVM_SMI             (TRC_HVM_HANDLER + 0x11)
			
 
				+#define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
			
 
				+#define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
			
 
				+#define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
			
 
				+#define TRC_HVM_INVLPG64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
			
 
				+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
			
 
				+#define TRC_HVM_IOPORT_READ     (TRC_HVM_HANDLER + 0x16)
			
 
				+#define TRC_HVM_IOMEM_READ      (TRC_HVM_HANDLER + 0x17)
			
 
				+#define TRC_HVM_CLTS            (TRC_HVM_HANDLER + 0x18)
			
 
				+#define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)
			
 
				+#define TRC_HVM_LMSW64          (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
			
 
				+#define TRC_HVM_RDTSC           (TRC_HVM_HANDLER + 0x1a)
			
 
				+#define TRC_HVM_INTR_WINDOW     (TRC_HVM_HANDLER + 0x20)
			
 
				+#define TRC_HVM_NPF             (TRC_HVM_HANDLER + 0x21)
			
 
				+#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22)
			
 
				+#define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
			
 
				+#define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
			
 
				+#define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
			
 
				+#define TRC_HVM_XCR_READ64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26)
			
 
				+#define TRC_HVM_XCR_WRITE64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27)
			
 
				+
			
 
				+#define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
			
 
				+#define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
			
 
				+
			
 
				+/* Trace events for emulated devices */
			
 
				+#define TRC_HVM_EMUL_HPET_START_TIMER  (TRC_HVM_EMUL + 0x1)
			
 
				+#define TRC_HVM_EMUL_PIT_START_TIMER   (TRC_HVM_EMUL + 0x2)
			
 
				+#define TRC_HVM_EMUL_RTC_START_TIMER   (TRC_HVM_EMUL + 0x3)
			
 
				+#define TRC_HVM_EMUL_LAPIC_START_TIMER (TRC_HVM_EMUL + 0x4)
			
 
				+#define TRC_HVM_EMUL_HPET_STOP_TIMER   (TRC_HVM_EMUL + 0x5)
			
 
				+#define TRC_HVM_EMUL_PIT_STOP_TIMER    (TRC_HVM_EMUL + 0x6)
			
 
				+#define TRC_HVM_EMUL_RTC_STOP_TIMER    (TRC_HVM_EMUL + 0x7)
			
 
				+#define TRC_HVM_EMUL_LAPIC_STOP_TIMER  (TRC_HVM_EMUL + 0x8)
			
 
				+#define TRC_HVM_EMUL_PIT_TIMER_CB      (TRC_HVM_EMUL + 0x9)
			
 
				+#define TRC_HVM_EMUL_LAPIC_TIMER_CB    (TRC_HVM_EMUL + 0xA)
			
 
				+#define TRC_HVM_EMUL_PIC_INT_OUTPUT    (TRC_HVM_EMUL + 0xB)
			
 
				+#define TRC_HVM_EMUL_PIC_KICK          (TRC_HVM_EMUL + 0xC)
			
 
				+#define TRC_HVM_EMUL_PIC_INTACK        (TRC_HVM_EMUL + 0xD)
			
 
				+#define TRC_HVM_EMUL_PIC_POSEDGE       (TRC_HVM_EMUL + 0xE)
			
 
				+#define TRC_HVM_EMUL_PIC_NEGEDGE       (TRC_HVM_EMUL + 0xF)
			
 
				+#define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10)
			
 
				+#define TRC_HVM_EMUL_LAPIC_PIC_INTR    (TRC_HVM_EMUL + 0x11)
			
 
				+
			
 
				+/* trace events for per class */
			
 
				+#define TRC_PM_FREQ_CHANGE      (TRC_HW_PM + 0x01)
			
 
				+#define TRC_PM_IDLE_ENTRY       (TRC_HW_PM + 0x02)
			
 
				+#define TRC_PM_IDLE_EXIT        (TRC_HW_PM + 0x03)
			
 
				+
			
 
				+/* Trace events for IRQs */
			
 
				+#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1)
			
 
				+#define TRC_HW_IRQ_MOVE_CLEANUP       (TRC_HW_IRQ + 0x2)
			
 
				+#define TRC_HW_IRQ_BIND_VECTOR        (TRC_HW_IRQ + 0x3)
			
 
				+#define TRC_HW_IRQ_CLEAR_VECTOR       (TRC_HW_IRQ + 0x4)
			
 
				+#define TRC_HW_IRQ_MOVE_FINISH        (TRC_HW_IRQ + 0x5)
			
 
				+#define TRC_HW_IRQ_ASSIGN_VECTOR      (TRC_HW_IRQ + 0x6)
			
 
				+#define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)
			
 
				+#define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)
			
 
				+
			
 
				+/*
			
 
				+ * Event Flags
			
 
				+ *
			
 
				+ * Some events (e.g, TRC_PV_TRAP and TRC_HVM_IOMEM_READ) have multiple
			
 
				+ * record formats.  These event flags distinguish between the
			
 
				+ * different formats.
			
 
				+ */
			
 
				+#define TRC_64_FLAG 0x100 /* Addresses are 64 bits (instead of 32 bits) */
			
 
				+
			
 
				+/* This structure represents a single trace buffer record. */
			
 
				+struct t_rec {
			
 
				+    uint32_t event:28;
			
 
				+    uint32_t extra_u32:3;         /* # entries in trailing extra_u32[] array */
			
 
				+    uint32_t cycles_included:1;   /* u.cycles or u.no_cycles? */
			
 
				+    union {
			
 
				+        struct {
			
 
				+            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
			
 
				+            uint32_t extra_u32[7];         /* event data items */
			
 
				+        } cycles;
			
 
				+        struct {
			
 
				+            uint32_t extra_u32[7];         /* event data items */
			
 
				+        } nocycles;
			
 
				+    } u;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * This structure contains the metadata for a single trace buffer.  The head
			
 
				+ * field, indexes into an array of struct t_rec's.
			
 
				+ */
			
 
				+struct t_buf {
			
 
				+    /* Assume the data buffer size is X.  X is generally not a power of 2.
			
 
				+     * CONS and PROD are incremented modulo (2*X):
			
 
				+     *     0 <= cons < 2*X
			
 
				+     *     0 <= prod < 2*X
			
 
				+     * This is done because addition modulo X breaks at 2^32 when X is not a
			
 
				+     * power of 2:
			
 
				+     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X
			
 
				+     */
			
 
				+    uint32_t cons;   /* Offset of next item to be consumed by control tools. */
			
 
				+    uint32_t prod;   /* Offset of next item to be produced by Xen.           */
			
 
				+    /*  Records follow immediately after the meta-data header.    */
			
 
				+};
			
 
				+
			
 
				+/* Structure used to pass MFNs to the trace buffers back to trace consumers.
			
 
				+ * Offset is an offset into the mapped structure where the mfn list will be held.
			
 
				+ * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).
			
 
				+ */
			
 
				+struct t_info {
			
 
				+    uint16_t tbuf_size; /* Size in pages of each trace buffer */
			
 
				+    uint16_t mfn_offset[];  /* Offset within t_info structure of the page list per cpu */
			
 
				+    /* MFN lists immediately after the header */
			
 
				+};
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_TRACE_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/vcpu.h
+++ b/include/hw/xen/interface/vcpu.h
@@ -0,0 +1,248 @@
 
				+/******************************************************************************
			
 
				+ * vcpu.h
			
 
				+ *
			
 
				+ * VCPU initialisation, query, and hotplug.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_VCPU_H__
			
 
				+#define __XEN_PUBLIC_VCPU_H__
			
 
				+
			
 
				+#include "xen.h"
			
 
				+
			
 
				+/*
			
 
				+ * Prototype for this hypercall is:
			
 
				+ *  long vcpu_op(int cmd, unsigned int vcpuid, void *extra_args)
			
 
				+ * @cmd        == VCPUOP_??? (VCPU operation).
			
 
				+ * @vcpuid     == VCPU to operate on.
			
 
				+ * @extra_args == Operation-specific extra arguments (NULL if none).
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Initialise a VCPU. Each VCPU can be initialised only once. A
			
 
				+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
			
 
				+ *
			
 
				+ * @extra_arg == For PV or ARM guests this is a pointer to a vcpu_guest_context
			
 
				+ *               structure containing the initial state for the VCPU. For x86
			
 
				+ *               HVM based guests this is a pointer to a vcpu_hvm_context
			
 
				+ *               structure.
			
 
				+ */
			
 
				+#define VCPUOP_initialise            0
			
 
				+
			
 
				+/*
			
 
				+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
			
 
				+ * if the VCPU has not been initialised (VCPUOP_initialise).
			
 
				+ */
			
 
				+#define VCPUOP_up                    1
			
 
				+
			
 
				+/*
			
 
				+ * Bring down a VCPU (i.e., make it non-runnable).
			
 
				+ * There are a few caveats that callers should observe:
			
 
				+ *  1. This operation may return, and VCPU_is_up may return false, before the
			
 
				+ *     VCPU stops running (i.e., the command is asynchronous). It is a good
			
 
				+ *     idea to ensure that the VCPU has entered a non-critical loop before
			
 
				+ *     bringing it down. Alternatively, this operation is guaranteed
			
 
				+ *     synchronous if invoked by the VCPU itself.
			
 
				+ *  2. After a VCPU is initialised, there is currently no way to drop all its
			
 
				+ *     references to domain memory. Even a VCPU that is down still holds
			
 
				+ *     memory references via its pagetable base pointer and GDT. It is good
			
 
				+ *     practise to move a VCPU onto an 'idle' or default page table, LDT and
			
 
				+ *     GDT before bringing it down.
			
 
				+ */
			
 
				+#define VCPUOP_down                  2
			
 
				+
			
 
				+/* Returns 1 if the given VCPU is up. */
			
 
				+#define VCPUOP_is_up                 3
			
 
				+
			
 
				+/*
			
 
				+ * Return information about the state and running time of a VCPU.
			
 
				+ * @extra_arg == pointer to vcpu_runstate_info structure.
			
 
				+ */
			
 
				+#define VCPUOP_get_runstate_info     4
			
 
				+struct vcpu_runstate_info {
			
 
				+    /* VCPU's current state (RUNSTATE_*). */
			
 
				+    int      state;
			
 
				+    /* When was current state entered (system time, ns)? */
			
 
				+    uint64_t state_entry_time;
			
 
				+    /*
			
 
				+     * Update indicator set in state_entry_time:
			
 
				+     * When activated via VMASST_TYPE_runstate_update_flag, set during
			
 
				+     * updates in guest memory mapped copy of vcpu_runstate_info.
			
 
				+     */
			
 
				+#define XEN_RUNSTATE_UPDATE          (xen_mk_ullong(1) << 63)
			
 
				+    /*
			
 
				+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
			
 
				+     * guaranteed not to drift from system time.
			
 
				+     */
			
 
				+    uint64_t time[4];
			
 
				+};
			
 
				+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
			
 
				+
			
 
				+/* VCPU is currently running on a physical CPU. */
			
 
				+#define RUNSTATE_running  0
			
 
				+
			
 
				+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
			
 
				+#define RUNSTATE_runnable 1
			
 
				+
			
 
				+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
			
 
				+#define RUNSTATE_blocked  2
			
 
				+
			
 
				+/*
			
 
				+ * VCPU is not runnable, but it is not blocked.
			
 
				+ * This is a 'catch all' state for things like hotplug and pauses by the
			
 
				+ * system administrator (or for critical sections in the hypervisor).
			
 
				+ * RUNSTATE_blocked dominates this state (it is the preferred state).
			
 
				+ */
			
 
				+#define RUNSTATE_offline  3
			
 
				+
			
 
				+/*
			
 
				+ * Register a shared memory area from which the guest may obtain its own
			
 
				+ * runstate information without needing to execute a hypercall.
			
 
				+ * Notes:
			
 
				+ *  1. The registered address may be virtual or physical or guest handle,
			
 
				+ *     depending on the platform. Virtual address or guest handle should be
			
 
				+ *     registered on x86 systems.
			
 
				+ *  2. Only one shared area may be registered per VCPU. The shared area is
			
 
				+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
			
 
				+ *     runstate.state will always be RUNSTATE_running and
			
 
				+ *     runstate.state_entry_time will indicate the system time at which the
			
 
				+ *     VCPU was last scheduled to run.
			
 
				+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
			
 
				+ */
			
 
				+#define VCPUOP_register_runstate_memory_area 5
			
 
				+struct vcpu_register_runstate_memory_area {
			
 
				+    union {
			
 
				+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
			
 
				+        struct vcpu_runstate_info *v;
			
 
				+        uint64_t p;
			
 
				+    } addr;
			
 
				+};
			
 
				+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
			
 
				+
			
 
				+/*
			
 
				+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
			
 
				+ * which can be set via these commands. Periods smaller than one millisecond
			
 
				+ * may not be supported.
			
 
				+ */
			
 
				+#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */
			
 
				+#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */
			
 
				+struct vcpu_set_periodic_timer {
			
 
				+    uint64_t period_ns;
			
 
				+};
			
 
				+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
			
 
				+
			
 
				+/*
			
 
				+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
			
 
				+ * timer which can be set via these commands.
			
 
				+ */
			
 
				+#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */
			
 
				+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
			
 
				+struct vcpu_set_singleshot_timer {
			
 
				+    uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */
			
 
				+    uint32_t flags;            /* VCPU_SSHOTTMR_??? */
			
 
				+};
			
 
				+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
			
 
				+
			
 
				+/* Flags to VCPUOP_set_singleshot_timer. */
			
 
				+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
			
 
				+#define _VCPU_SSHOTTMR_future (0)
			
 
				+#define VCPU_SSHOTTMR_future  (1U << _VCPU_SSHOTTMR_future)
			
 
				+
			
 
				+/*
			
 
				+ * Register a memory location in the guest address space for the
			
 
				+ * vcpu_info structure.  This allows the guest to place the vcpu_info
			
 
				+ * structure in a convenient place, such as in a per-cpu data area.
			
 
				+ * The pointer need not be page aligned, but the structure must not
			
 
				+ * cross a page boundary.
			
 
				+ *
			
 
				+ * This may be called only once per vcpu.
			
 
				+ */
			
 
				+#define VCPUOP_register_vcpu_info   10  /* arg == vcpu_register_vcpu_info_t */
			
 
				+struct vcpu_register_vcpu_info {
			
 
				+    uint64_t mfn;    /* mfn of page to place vcpu_info */
			
 
				+    uint32_t offset; /* offset within page */
			
 
				+    uint32_t rsvd;   /* unused */
			
 
				+};
			
 
				+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
			
 
				+
			
 
				+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
			
 
				+#define VCPUOP_send_nmi             11
			
 
				+
			
 
				+/*
			
 
				+ * Get the physical ID information for a pinned vcpu's underlying physical
			
 
				+ * processor.  The physical ID informmation is architecture-specific.
			
 
				+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id.
			
 
				+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
			
 
				+ */
			
 
				+#define VCPUOP_get_physid           12 /* arg == vcpu_get_physid_t */
			
 
				+struct vcpu_get_physid {
			
 
				+    uint64_t phys_id;
			
 
				+};
			
 
				+typedef struct vcpu_get_physid vcpu_get_physid_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
			
 
				+#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid))
			
 
				+#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32))
			
 
				+
			
 
				+/*
			
 
				+ * Register a memory location to get a secondary copy of the vcpu time
			
 
				+ * parameters.  The master copy still exists as part of the vcpu shared
			
 
				+ * memory area, and this secondary copy is updated whenever the master copy
			
 
				+ * is updated (and using the same versioning scheme for synchronisation).
			
 
				+ *
			
 
				+ * The intent is that this copy may be mapped (RO) into userspace so
			
 
				+ * that usermode can compute system time using the time info and the
			
 
				+ * tsc.  Usermode will see an array of vcpu_time_info structures, one
			
 
				+ * for each vcpu, and choose the right one by an existing mechanism
			
 
				+ * which allows it to get the current vcpu number (such as via a
			
 
				+ * segment limit).  It can then apply the normal algorithm to compute
			
 
				+ * system time from the tsc.
			
 
				+ *
			
 
				+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
			
 
				+ */
			
 
				+#define VCPUOP_register_vcpu_time_memory_area   13
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);
			
 
				+struct vcpu_register_time_memory_area {
			
 
				+    union {
			
 
				+        XEN_GUEST_HANDLE(vcpu_time_info_t) h;
			
 
				+        struct vcpu_time_info *v;
			
 
				+        uint64_t p;
			
 
				+    } addr;
			
 
				+};
			
 
				+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_VCPU_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/version.h
+++ b/include/hw/xen/interface/version.h
@@ -0,0 +1,113 @@
 
				+/******************************************************************************
			
 
				+ * version.h
			
 
				+ *
			
 
				+ * Xen version, type, and compile information.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
			
 
				+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_VERSION_H__
			
 
				+#define __XEN_PUBLIC_VERSION_H__
			
 
				+
			
 
				+#include "xen.h"
			
 
				+
			
 
				+/* NB. All ops return zero on success, except XENVER_{version,pagesize}
			
 
				+ * XENVER_{version,pagesize,build_id} */
			
 
				+
			
 
				+/* arg == NULL; returns major:minor (16:16). */
			
 
				+#define XENVER_version      0
			
 
				+
			
 
				+/* arg == xen_extraversion_t. */
			
 
				+#define XENVER_extraversion 1
			
 
				+typedef char xen_extraversion_t[16];
			
 
				+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
			
 
				+
			
 
				+/* arg == xen_compile_info_t. */
			
 
				+#define XENVER_compile_info 2
			
 
				+struct xen_compile_info {
			
 
				+    char compiler[64];
			
 
				+    char compile_by[16];
			
 
				+    char compile_domain[32];
			
 
				+    char compile_date[32];
			
 
				+};
			
 
				+typedef struct xen_compile_info xen_compile_info_t;
			
 
				+
			
 
				+#define XENVER_capabilities 3
			
 
				+typedef char xen_capabilities_info_t[1024];
			
 
				+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
			
 
				+
			
 
				+#define XENVER_changeset 4
			
 
				+typedef char xen_changeset_info_t[64];
			
 
				+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
			
 
				+
			
 
				+#define XENVER_platform_parameters 5
			
 
				+struct xen_platform_parameters {
			
 
				+    xen_ulong_t virt_start;
			
 
				+};
			
 
				+typedef struct xen_platform_parameters xen_platform_parameters_t;
			
 
				+
			
 
				+#define XENVER_get_features 6
			
 
				+struct xen_feature_info {
			
 
				+    unsigned int submap_idx;    /* IN: which 32-bit submap to return */
			
 
				+    uint32_t     submap;        /* OUT: 32-bit submap */
			
 
				+};
			
 
				+typedef struct xen_feature_info xen_feature_info_t;
			
 
				+
			
 
				+/* Declares the features reported by XENVER_get_features. */
			
 
				+#include "features.h"
			
 
				+
			
 
				+/* arg == NULL; returns host memory page size. */
			
 
				+#define XENVER_pagesize 7
			
 
				+
			
 
				+/* arg == xen_domain_handle_t.
			
 
				+ *
			
 
				+ * The toolstack fills it out for guest consumption. It is intended to hold
			
 
				+ * the UUID of the guest.
			
 
				+ */
			
 
				+#define XENVER_guest_handle 8
			
 
				+
			
 
				+#define XENVER_commandline 9
			
 
				+typedef char xen_commandline_t[1024];
			
 
				+
			
 
				+/*
			
 
				+ * Return value is the number of bytes written, or XEN_Exx on error.
			
 
				+ * Calling with empty parameter returns the size of build_id.
			
 
				+ */
			
 
				+#define XENVER_build_id 10
			
 
				+struct xen_build_id {
			
 
				+        uint32_t        len; /* IN: size of buf[]. */
			
 
				+        unsigned char   buf[XEN_FLEX_ARRAY_DIM];
			
 
				+                             /* OUT: Variable length buffer with build_id. */
			
 
				+};
			
 
				+typedef struct xen_build_id xen_build_id_t;
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_VERSION_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/interface/xen-compat.h
+++ b/include/hw/xen/interface/xen-compat.h
@@ -0,0 +1,46 @@
 
				+/******************************************************************************
			
 
				+ * xen-compat.h
			
 
				+ *
			
 
				+ * Guest OS interface to Xen.  Compatibility layer.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2006, Christian Limpach
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
			
 
				+#define __XEN_PUBLIC_XEN_COMPAT_H__
			
 
				+
			
 
				+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040e00
			
 
				+
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+/* Xen is built with matching headers and implements the latest interface. */
			
 
				+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
			
 
				+#elif !defined(__XEN_INTERFACE_VERSION__)
			
 
				+/* Guests which do not specify a version get the legacy interface. */
			
 
				+#define __XEN_INTERFACE_VERSION__ 0x00000000
			
 
				+#endif
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__
			
 
				+#error "These header files do not support the requested interface version."
			
 
				+#endif
			
 
				+
			
 
				+#define COMPAT_FLEX_ARRAY_DIM XEN_FLEX_ARRAY_DIM
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */
			
--- a/include/hw/xen/interface/xen.h
+++ b/include/hw/xen/interface/xen.h
@@ -0,0 +1,1049 @@
 
				+/******************************************************************************
			
 
				+ * xen.h
			
 
				+ *
			
 
				+ * Guest OS interface to Xen.
			
 
				+ *
			
 
				+ * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+ * of this software and associated documentation files (the "Software"), to
			
 
				+ * deal in the Software without restriction, including without limitation the
			
 
				+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
			
 
				+ * sell copies of the Software, and to permit persons to whom the Software is
			
 
				+ * furnished to do so, subject to the following conditions:
			
 
				+ *
			
 
				+ * The above copyright notice and this permission notice shall be included in
			
 
				+ * all copies or substantial portions of the Software.
			
 
				+ *
			
 
				+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
			
 
				+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
			
 
				+ * DEALINGS IN THE SOFTWARE.
			
 
				+ *
			
 
				+ * Copyright (c) 2004, K A Fraser
			
 
				+ */
			
 
				+
			
 
				+#ifndef __XEN_PUBLIC_XEN_H__
			
 
				+#define __XEN_PUBLIC_XEN_H__
			
 
				+
			
 
				+#include "xen-compat.h"
			
 
				+
			
 
				+#if defined(__i386__) || defined(__x86_64__)
			
 
				+#include "arch-x86/xen.h"
			
 
				+#elif defined(__arm__) || defined (__aarch64__)
			
 
				+#include "arch-arm.h"
			
 
				+#else
			
 
				+#error "Unsupported architecture"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+/* Guest handles for primitive C types. */
			
 
				+DEFINE_XEN_GUEST_HANDLE(char);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
			
 
				+DEFINE_XEN_GUEST_HANDLE(int);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040300
			
 
				+DEFINE_XEN_GUEST_HANDLE(long);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
			
 
				+#endif
			
 
				+DEFINE_XEN_GUEST_HANDLE(void);
			
 
				+
			
 
				+DEFINE_XEN_GUEST_HANDLE(uint64_t);
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
			
 
				+DEFINE_XEN_GUEST_HANDLE(xen_ulong_t);
			
 
				+
			
 
				+/* Define a variable length array (depends on compiler). */
			
 
				+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
			
 
				+#define XEN_FLEX_ARRAY_DIM
			
 
				+#elif defined(__GNUC__)
			
 
				+#define XEN_FLEX_ARRAY_DIM  0
			
 
				+#else
			
 
				+#define XEN_FLEX_ARRAY_DIM  1 /* variable size */
			
 
				+#endif
			
 
				+
			
 
				+/* Turn a plain number into a C unsigned (long (long)) constant. */
			
 
				+#define __xen_mk_uint(x)  x ## U
			
 
				+#define __xen_mk_ulong(x) x ## UL
			
 
				+#ifndef __xen_mk_ullong
			
 
				+# define __xen_mk_ullong(x) x ## ULL
			
 
				+#endif
			
 
				+#define xen_mk_uint(x)    __xen_mk_uint(x)
			
 
				+#define xen_mk_ulong(x)   __xen_mk_ulong(x)
			
 
				+#define xen_mk_ullong(x)  __xen_mk_ullong(x)
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+/* In assembly code we cannot use C numeric constant suffixes. */
			
 
				+#define xen_mk_uint(x)   x
			
 
				+#define xen_mk_ulong(x)  x
			
 
				+#define xen_mk_ullong(x) x
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * HYPERCALLS
			
 
				+ */
			
 
				+
			
 
				+/* `incontents 100 hcalls List of hypercalls
			
 
				+ * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*()
			
 
				+ */
			
 
				+
			
 
				+#define __HYPERVISOR_set_trap_table        0
			
 
				+#define __HYPERVISOR_mmu_update            1
			
 
				+#define __HYPERVISOR_set_gdt               2
			
 
				+#define __HYPERVISOR_stack_switch          3
			
 
				+#define __HYPERVISOR_set_callbacks         4
			
 
				+#define __HYPERVISOR_fpu_taskswitch        5
			
 
				+#define __HYPERVISOR_sched_op_compat       6 /* compat since 0x00030101 */
			
 
				+#define __HYPERVISOR_platform_op           7
			
 
				+#define __HYPERVISOR_set_debugreg          8
			
 
				+#define __HYPERVISOR_get_debugreg          9
			
 
				+#define __HYPERVISOR_update_descriptor    10
			
 
				+#define __HYPERVISOR_memory_op            12
			
 
				+#define __HYPERVISOR_multicall            13
			
 
				+#define __HYPERVISOR_update_va_mapping    14
			
 
				+#define __HYPERVISOR_set_timer_op         15
			
 
				+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
			
 
				+#define __HYPERVISOR_xen_version          17
			
 
				+#define __HYPERVISOR_console_io           18
			
 
				+#define __HYPERVISOR_physdev_op_compat    19 /* compat since 0x00030202 */
			
 
				+#define __HYPERVISOR_grant_table_op       20
			
 
				+#define __HYPERVISOR_vm_assist            21
			
 
				+#define __HYPERVISOR_update_va_mapping_otherdomain 22
			
 
				+#define __HYPERVISOR_iret                 23 /* x86 only */
			
 
				+#define __HYPERVISOR_vcpu_op              24
			
 
				+#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
			
 
				+#define __HYPERVISOR_mmuext_op            26
			
 
				+#define __HYPERVISOR_xsm_op               27
			
 
				+#define __HYPERVISOR_nmi_op               28
			
 
				+#define __HYPERVISOR_sched_op             29
			
 
				+#define __HYPERVISOR_callback_op          30
			
 
				+#define __HYPERVISOR_xenoprof_op          31
			
 
				+#define __HYPERVISOR_event_channel_op     32
			
 
				+#define __HYPERVISOR_physdev_op           33
			
 
				+#define __HYPERVISOR_hvm_op               34
			
 
				+#define __HYPERVISOR_sysctl               35
			
 
				+#define __HYPERVISOR_domctl               36
			
 
				+#define __HYPERVISOR_kexec_op             37
			
 
				+#define __HYPERVISOR_tmem_op              38
			
 
				+#define __HYPERVISOR_argo_op              39
			
 
				+#define __HYPERVISOR_xenpmu_op            40
			
 
				+#define __HYPERVISOR_dm_op                41
			
 
				+#define __HYPERVISOR_hypfs_op             42
			
 
				+
			
 
				+/* Architecture-specific hypercall definitions. */
			
 
				+#define __HYPERVISOR_arch_0               48
			
 
				+#define __HYPERVISOR_arch_1               49
			
 
				+#define __HYPERVISOR_arch_2               50
			
 
				+#define __HYPERVISOR_arch_3               51
			
 
				+#define __HYPERVISOR_arch_4               52
			
 
				+#define __HYPERVISOR_arch_5               53
			
 
				+#define __HYPERVISOR_arch_6               54
			
 
				+#define __HYPERVISOR_arch_7               55
			
 
				+
			
 
				+/* ` } */
			
 
				+
			
 
				+/*
			
 
				+ * HYPERCALL COMPATIBILITY.
			
 
				+ */
			
 
				+
			
 
				+/* New sched_op hypercall introduced in 0x00030101. */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00030101
			
 
				+#undef __HYPERVISOR_sched_op
			
 
				+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
			
 
				+#endif
			
 
				+
			
 
				+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00030202
			
 
				+#undef __HYPERVISOR_event_channel_op
			
 
				+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
			
 
				+#undef __HYPERVISOR_physdev_op
			
 
				+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
			
 
				+#endif
			
 
				+
			
 
				+/* New platform_op hypercall introduced in 0x00030204. */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00030204
			
 
				+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * VIRTUAL INTERRUPTS
			
 
				+ *
			
 
				+ * Virtual interrupts that a guest OS may receive from Xen.
			
 
				+ *
			
 
				+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
			
 
				+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
			
 
				+ * The latter can be allocated only once per guest: they must initially be
			
 
				+ * allocated to VCPU0 but can subsequently be re-bound.
			
 
				+ */
			
 
				+/* ` enum virq { */
			
 
				+#define VIRQ_TIMER      0  /* V. Timebase update, and/or requested timeout.  */
			
 
				+#define VIRQ_DEBUG      1  /* V. Request guest to dump debug info.           */
			
 
				+#define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
			
 
				+#define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
			
 
				+#define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
			
 
				+#define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
			
 
				+#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
			
 
				+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
			
 
				+#define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */
			
 
				+#define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occurred          */
			
 
				+#define VIRQ_ARGO       11 /* G. Argo interdomain message notification       */
			
 
				+#define VIRQ_ENOMEM     12 /* G. (DOM0) Low on heap memory       */
			
 
				+#define VIRQ_XENPMU     13 /* V.  PMC interrupt                              */
			
 
				+
			
 
				+/* Architecture-specific VIRQ definitions. */
			
 
				+#define VIRQ_ARCH_0    16
			
 
				+#define VIRQ_ARCH_1    17
			
 
				+#define VIRQ_ARCH_2    18
			
 
				+#define VIRQ_ARCH_3    19
			
 
				+#define VIRQ_ARCH_4    20
			
 
				+#define VIRQ_ARCH_5    21
			
 
				+#define VIRQ_ARCH_6    22
			
 
				+#define VIRQ_ARCH_7    23
			
 
				+/* ` } */
			
 
				+
			
 
				+#define NR_VIRQS       24
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[],
			
 
				+ * `                       unsigned count, unsigned *done_out,
			
 
				+ * `                       unsigned foreigndom)
			
 
				+ * `
			
 
				+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
			
 
				+ * @count is the length of the above array.
			
 
				+ * @pdone is an output parameter indicating number of completed operations
			
 
				+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
			
 
				+ *                    hypercall invocation. Can be DOMID_SELF.
			
 
				+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
			
 
				+ *                     in this hypercall invocation. The value of this field
			
 
				+ *                     (x) encodes the PFD as follows:
			
 
				+ *                     x == 0 => PFD == DOMID_SELF
			
 
				+ *                     x != 0 => PFD == x - 1
			
 
				+ *
			
 
				+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
			
 
				+ * -------------
			
 
				+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
			
 
				+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,
			
 
				+ * and the new table entry is valid/present, the mapped frame must belong to
			
 
				+ * FD. If attempting to map an I/O page then the caller assumes the privilege
			
 
				+ * of the FD.
			
 
				+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
			
 
				+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
			
 
				+ * ptr[:2]  -- Machine address of the page-table entry to modify.
			
 
				+ * val      -- Value to write.
			
 
				+ *
			
 
				+ * There also certain implicit requirements when using this hypercall. The
			
 
				+ * pages that make up a pagetable must be mapped read-only in the guest.
			
 
				+ * This prevents uncontrolled guest updates to the pagetable. Xen strictly
			
 
				+ * enforces this, and will disallow any pagetable update which will end up
			
 
				+ * mapping pagetable page RW, and will disallow using any writable page as a
			
 
				+ * pagetable. In practice it means that when constructing a page table for a
			
 
				+ * process, thread, etc, we MUST be very dilligient in following these rules:
			
 
				+ *  1). Start with top-level page (PGD or in Xen language: L4). Fill out
			
 
				+ *      the entries.
			
 
				+ *  2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
			
 
				+ *      or L2).
			
 
				+ *  3). Start filling out the PTE table (L1) with the PTE entries. Once
			
 
				+ *  	done, make sure to set each of those entries to RO (so writeable bit
			
 
				+ *  	is unset). Once that has been completed, set the PMD (L2) for this
			
 
				+ *  	PTE table as RO.
			
 
				+ *  4). When completed with all of the PMD (L2) entries, and all of them have
			
 
				+ *  	been set to RO, make sure to set RO the PUD (L3). Do the same
			
 
				+ *  	operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
			
 
				+ *  5). Now before you can use those pages (so setting the cr3), you MUST also
			
 
				+ *      pin them so that the hypervisor can verify the entries. This is done
			
 
				+ *      via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
			
 
				+ *      number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
			
 
				+ *      MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
			
 
				+ *      issued.
			
 
				+ * For 32-bit guests, the L4 is not used (as there is less pagetables), so
			
 
				+ * instead use L3.
			
 
				+ * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
			
 
				+ * hypercall. Also if so desired the OS can also try to write to the PTE
			
 
				+ * and be trapped by the hypervisor (as the PTE entry is RO).
			
 
				+ *
			
 
				+ * To deallocate the pages, the operations are the reverse of the steps
			
 
				+ * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
			
 
				+ * pagetable MUST not be in use (meaning that the cr3 is not set to it).
			
 
				+ *
			
 
				+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
			
 
				+ * Updates an entry in the machine->pseudo-physical mapping table.
			
 
				+ * ptr[:2]  -- Machine address within the frame whose mapping to modify.
			
 
				+ *             The frame must belong to the FD, if one is specified.
			
 
				+ * val      -- Value to write into the mapping entry.
			
 
				+ *
			
 
				+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
			
 
				+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
			
 
				+ * with those in @val.
			
 
				+ *
			
 
				+ * ptr[1:0] == MMU_PT_UPDATE_NO_TRANSLATE:
			
 
				+ * As MMU_NORMAL_PT_UPDATE above, but @val is not translated though FD
			
 
				+ * page tables.
			
 
				+ *
			
 
				+ * @val is usually the machine frame number along with some attributes.
			
 
				+ * The attributes by default follow the architecture defined bits. Meaning that
			
 
				+ * if this is a X86_64 machine and four page table layout is used, the layout
			
 
				+ * of val is:
			
 
				+ *  - 63 if set means No execute (NX)
			
 
				+ *  - 46-13 the machine frame number
			
 
				+ *  - 12 available for guest
			
 
				+ *  - 11 available for guest
			
 
				+ *  - 10 available for guest
			
 
				+ *  - 9 available for guest
			
 
				+ *  - 8 global
			
 
				+ *  - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
			
 
				+ *  - 6 dirty
			
 
				+ *  - 5 accessed
			
 
				+ *  - 4 page cached disabled
			
 
				+ *  - 3 page write through
			
 
				+ *  - 2 userspace accessible
			
 
				+ *  - 1 writeable
			
 
				+ *  - 0 present
			
 
				+ *
			
 
				+ *  The one bits that does not fit with the default layout is the PAGE_PSE
			
 
				+ *  also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
			
 
				+ *  HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
			
 
				+ *  (or 2MB) instead of using the PAGE_PSE bit.
			
 
				+ *
			
 
				+ *  The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
			
 
				+ *  using it as the Page Attribute Table (PAT) bit - for details on it please
			
 
				+ *  refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
			
 
				+ *  pages instead of using MTRRs.
			
 
				+ *
			
 
				+ *  The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
			
 
				+ *                    PAT4                 PAT0
			
 
				+ *  +-----+-----+----+----+----+-----+----+----+
			
 
				+ *  | UC  | UC- | WC | WB | UC | UC- | WC | WB |  <= Linux
			
 
				+ *  +-----+-----+----+----+----+-----+----+----+
			
 
				+ *  | UC  | UC- | WT | WB | UC | UC- | WT | WB |  <= BIOS (default when machine boots)
			
 
				+ *  +-----+-----+----+----+----+-----+----+----+
			
 
				+ *  | rsv | rsv | WP | WC | UC | UC- | WT | WB |  <= Xen
			
 
				+ *  +-----+-----+----+----+----+-----+----+----+
			
 
				+ *
			
 
				+ *  The lookup of this index table translates to looking up
			
 
				+ *  Bit 7, Bit 4, and Bit 3 of val entry:
			
 
				+ *
			
 
				+ *  PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
			
 
				+ *
			
 
				+ *  If all bits are off, then we are using PAT0. If bit 3 turned on,
			
 
				+ *  then we are using PAT1, if bit 3 and bit 4, then PAT2..
			
 
				+ *
			
 
				+ *  As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
			
 
				+ *  that if a guest that follows Linux's PAT setup and would like to set Write
			
 
				+ *  Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
			
 
				+ *  set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
			
 
				+ *  caching as:
			
 
				+ *
			
 
				+ *   WB = none (so PAT0)
			
 
				+ *   WC = PWT (bit 3 on)
			
 
				+ *   UC = PWT | PCD (bit 3 and 4 are on).
			
 
				+ *
			
 
				+ * To make it work with Xen, it needs to translate the WC bit as so:
			
 
				+ *
			
 
				+ *  PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
			
 
				+ *
			
 
				+ * And to translate back it would:
			
 
				+ *
			
 
				+ * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
			
 
				+ */
			
 
				+#define MMU_NORMAL_PT_UPDATE       0 /* checked '*ptr = val'. ptr is MA.      */
			
 
				+#define MMU_MACHPHYS_UPDATE        1 /* ptr = MA of frame to modify entry for */
			
 
				+#define MMU_PT_UPDATE_PRESERVE_AD  2 /* atomically: *ptr = val | (*ptr&(A|D)) */
			
 
				+#define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA.      */
			
 
				+                                     /* val never translated.                 */
			
 
				+
			
 
				+/*
			
 
				+ * MMU EXTENDED OPERATIONS
			
 
				+ *
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_mmuext_op(mmuext_op_t uops[],
			
 
				+ * `                      unsigned int count,
			
 
				+ * `                      unsigned int *pdone,
			
 
				+ * `                      unsigned int foreigndom)
			
 
				+ */
			
 
				+/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
			
 
				+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
			
 
				+ * Where the FD has some effect, it is described below.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_(UN)PIN_*_TABLE
			
 
				+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
			
 
				+ *      The frame must belong to the FD, if one is specified.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_NEW_BASEPTR
			
 
				+ * mfn: Machine frame number of new page-table base to install in MMU.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
			
 
				+ * mfn: Machine frame number of new page-table base to install in MMU
			
 
				+ *      when in user space.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
			
 
				+ * No additional arguments. Flushes local TLB.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_INVLPG_LOCAL
			
 
				+ * linear_addr: Linear address to be flushed from the local TLB.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_TLB_FLUSH_MULTI
			
 
				+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_INVLPG_MULTI
			
 
				+ * linear_addr: Linear address to be flushed.
			
 
				+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_TLB_FLUSH_ALL
			
 
				+ * No additional arguments. Flushes all VCPUs' TLBs.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_INVLPG_ALL
			
 
				+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_FLUSH_CACHE
			
 
				+ * No additional arguments. Writes back and flushes cache contents.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
			
 
				+ * No additional arguments. Writes back and flushes cache contents
			
 
				+ * on all CPUs in the system.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_SET_LDT
			
 
				+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
			
 
				+ * nr_ents: Number of entries in LDT.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_CLEAR_PAGE
			
 
				+ * mfn: Machine frame number to be cleared.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_COPY_PAGE
			
 
				+ * mfn: Machine frame number of the destination page.
			
 
				+ * src_mfn: Machine frame number of the source page.
			
 
				+ *
			
 
				+ * cmd: MMUEXT_[UN]MARK_SUPER
			
 
				+ * mfn: Machine frame number of head of superpage to be [un]marked.
			
 
				+ */
			
 
				+/* ` enum mmuext_cmd { */
			
 
				+#define MMUEXT_PIN_L1_TABLE      0
			
 
				+#define MMUEXT_PIN_L2_TABLE      1
			
 
				+#define MMUEXT_PIN_L3_TABLE      2
			
 
				+#define MMUEXT_PIN_L4_TABLE      3
			
 
				+#define MMUEXT_UNPIN_TABLE       4
			
 
				+#define MMUEXT_NEW_BASEPTR       5
			
 
				+#define MMUEXT_TLB_FLUSH_LOCAL   6
			
 
				+#define MMUEXT_INVLPG_LOCAL      7
			
 
				+#define MMUEXT_TLB_FLUSH_MULTI   8
			
 
				+#define MMUEXT_INVLPG_MULTI      9
			
 
				+#define MMUEXT_TLB_FLUSH_ALL    10
			
 
				+#define MMUEXT_INVLPG_ALL       11
			
 
				+#define MMUEXT_FLUSH_CACHE      12
			
 
				+#define MMUEXT_SET_LDT          13
			
 
				+#define MMUEXT_NEW_USER_BASEPTR 15
			
 
				+#define MMUEXT_CLEAR_PAGE       16
			
 
				+#define MMUEXT_COPY_PAGE        17
			
 
				+#define MMUEXT_FLUSH_CACHE_GLOBAL 18
			
 
				+#define MMUEXT_MARK_SUPER       19
			
 
				+#define MMUEXT_UNMARK_SUPER     20
			
 
				+/* ` } */
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+struct mmuext_op {
			
 
				+    unsigned int cmd; /* => enum mmuext_cmd */
			
 
				+    union {
			
 
				+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
			
 
				+         * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
			
 
				+        xen_pfn_t     mfn;
			
 
				+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
			
 
				+        unsigned long linear_addr;
			
 
				+    } arg1;
			
 
				+    union {
			
 
				+        /* SET_LDT */
			
 
				+        unsigned int nr_ents;
			
 
				+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
			
 
				+        XEN_GUEST_HANDLE(const_void) vcpumask;
			
 
				+#else
			
 
				+        const void *vcpumask;
			
 
				+#endif
			
 
				+        /* COPY_PAGE */
			
 
				+        xen_pfn_t src_mfn;
			
 
				+    } arg2;
			
 
				+};
			
 
				+typedef struct mmuext_op mmuext_op_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_update_va_mapping(unsigned long va, u64 val,
			
 
				+ * `                              enum uvm_flags flags)
			
 
				+ * `
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, u64 val,
			
 
				+ * `                                          enum uvm_flags flags,
			
 
				+ * `                                          domid_t domid)
			
 
				+ * `
			
 
				+ * ` @va: The virtual address whose mapping we want to change
			
 
				+ * ` @val: The new page table entry, must contain a machine address
			
 
				+ * ` @flags: Control TLB flushes
			
 
				+ */
			
 
				+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
			
 
				+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
			
 
				+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
			
 
				+/* ` enum uvm_flags { */
			
 
				+#define UVMF_NONE           (xen_mk_ulong(0)<<0) /* No flushing at all.   */
			
 
				+#define UVMF_TLB_FLUSH      (xen_mk_ulong(1)<<0) /* Flush entire TLB(s).  */
			
 
				+#define UVMF_INVLPG         (xen_mk_ulong(2)<<0) /* Flush only one entry. */
			
 
				+#define UVMF_FLUSHTYPE_MASK (xen_mk_ulong(3)<<0)
			
 
				+#define UVMF_MULTI          (xen_mk_ulong(0)<<2) /* Flush subset of TLBs. */
			
 
				+#define UVMF_LOCAL          (xen_mk_ulong(0)<<2) /* Flush local TLB.      */
			
 
				+#define UVMF_ALL            (xen_mk_ulong(1)<<2) /* Flush all TLBs.       */
			
 
				+/* ` } */
			
 
				+
			
 
				+/*
			
 
				+ * ` int
			
 
				+ * ` HYPERVISOR_console_io(unsigned int cmd,
			
 
				+ * `                       unsigned int count,
			
 
				+ * `                       char buffer[]);
			
 
				+ *
			
 
				+ * @cmd: Command (see below)
			
 
				+ * @count: Size of the buffer to read/write
			
 
				+ * @buffer: Pointer in the guest memory
			
 
				+ *
			
 
				+ * List of commands:
			
 
				+ *
			
 
				+ *  * CONSOLEIO_write: Write the buffer to Xen console.
			
 
				+ *      For the hardware domain, all the characters in the buffer will
			
 
				+ *      be written. Characters will be printed directly to the console.
			
 
				+ *      For all the other domains, only the printable characters will be
			
 
				+ *      written. Characters may be buffered until a newline (i.e '\n') is
			
 
				+ *      found.
			
 
				+ *      @return 0 on success, otherwise return an error code.
			
 
				+ *  * CONSOLEIO_read: Attempts to read up to @count characters from Xen
			
 
				+ *      console. The maximum buffer size (i.e. @count) supported is 2GB.
			
 
				+ *      @return the number of characters read on success, otherwise return
			
 
				+ *      an error code.
			
 
				+ */
			
 
				+#define CONSOLEIO_write         0
			
 
				+#define CONSOLEIO_read          1
			
 
				+
			
 
				+/*
			
 
				+ * Commands to HYPERVISOR_vm_assist().
			
 
				+ */
			
 
				+#define VMASST_CMD_enable                0
			
 
				+#define VMASST_CMD_disable               1
			
 
				+
			
 
				+/* x86/32 guests: simulate full 4GB segment limits. */
			
 
				+#define VMASST_TYPE_4gb_segments         0
			
 
				+
			
 
				+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
			
 
				+#define VMASST_TYPE_4gb_segments_notify  1
			
 
				+
			
 
				+/*
			
 
				+ * x86 guests: support writes to bottom-level PTEs.
			
 
				+ * NB1. Page-directory entries cannot be written.
			
 
				+ * NB2. Guest must continue to remove all writable mappings of PTEs.
			
 
				+ */
			
 
				+#define VMASST_TYPE_writable_pagetables  2
			
 
				+
			
 
				+/* x86/PAE guests: support PDPTs above 4GB. */
			
 
				+#define VMASST_TYPE_pae_extended_cr3     3
			
 
				+
			
 
				+/*
			
 
				+ * x86 guests: Sane behaviour for virtual iopl
			
 
				+ *  - virtual iopl updated from do_iret() hypercalls.
			
 
				+ *  - virtual iopl reported in bounce frames.
			
 
				+ *  - guest kernels assumed to be level 0 for the purpose of iopl checks.
			
 
				+ */
			
 
				+#define VMASST_TYPE_architectural_iopl   4
			
 
				+
			
 
				+/*
			
 
				+ * All guests: activate update indicator in vcpu_runstate_info
			
 
				+ * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped
			
 
				+ * vcpu_runstate_info during updates of the runstate information.
			
 
				+ */
			
 
				+#define VMASST_TYPE_runstate_update_flag 5
			
 
				+
			
 
				+/*
			
 
				+ * x86/64 guests: strictly hide M2P from user mode.
			
 
				+ * This allows the guest to control respective hypervisor behavior:
			
 
				+ * - when not set, L4 tables get created with the respective slot blank,
			
 
				+ *   and whenever the L4 table gets used as a kernel one the missing
			
 
				+ *   mapping gets inserted,
			
 
				+ * - when set, L4 tables get created with the respective slot initialized
			
 
				+ *   as before, and whenever the L4 table gets used as a user one the
			
 
				+ *   mapping gets zapped.
			
 
				+ */
			
 
				+#define VMASST_TYPE_m2p_strict           32
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040600
			
 
				+#define MAX_VMASST_TYPE                  3
			
 
				+#endif
			
 
				+
			
 
				+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
			
 
				+#define DOMID_FIRST_RESERVED xen_mk_uint(0x7FF0)
			
 
				+
			
 
				+/* DOMID_SELF is used in certain contexts to refer to oneself. */
			
 
				+#define DOMID_SELF           xen_mk_uint(0x7FF0)
			
 
				+
			
 
				+/*
			
 
				+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
			
 
				+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
			
 
				+ * is useful to ensure that no mappings to the OS's own heap are accidentally
			
 
				+ * installed. (e.g., in Linux this could cause havoc as reference counts
			
 
				+ * aren't adjusted on the I/O-mapping code path).
			
 
				+ * This only makes sense as HYPERVISOR_mmu_update()'s and
			
 
				+ * HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument. For
			
 
				+ * HYPERVISOR_mmu_update() context it can be specified by any calling domain,
			
 
				+ * otherwise it's only permitted if the caller is privileged.
			
 
				+ */
			
 
				+#define DOMID_IO             xen_mk_uint(0x7FF1)
			
 
				+
			
 
				+/*
			
 
				+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
			
 
				+ * Xen's heap space (e.g., the machine_to_phys table).
			
 
				+ * This only makes sense as
			
 
				+ * - HYPERVISOR_mmu_update()'s, HYPERVISOR_mmuext_op()'s, or
			
 
				+ *   HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument,
			
 
				+ * - with XENMAPSPACE_gmfn_foreign,
			
 
				+ * and is only permitted if the caller is privileged.
			
 
				+ */
			
 
				+#define DOMID_XEN            xen_mk_uint(0x7FF2)
			
 
				+
			
 
				+/*
			
 
				+ * DOMID_COW is used as the owner of sharable pages */
			
 
				+#define DOMID_COW            xen_mk_uint(0x7FF3)
			
 
				+
			
 
				+/* DOMID_INVALID is used to identify pages with unknown owner. */
			
 
				+#define DOMID_INVALID        xen_mk_uint(0x7FF4)
			
 
				+
			
 
				+/* Idle domain. */
			
 
				+#define DOMID_IDLE           xen_mk_uint(0x7FFF)
			
 
				+
			
 
				+/* Mask for valid domain id values */
			
 
				+#define DOMID_MASK           xen_mk_uint(0x7FFF)
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+
			
 
				+typedef uint16_t domid_t;
			
 
				+
			
 
				+/*
			
 
				+ * Send an array of these to HYPERVISOR_mmu_update().
			
 
				+ * NB. The fields are natural pointer/address size for this architecture.
			
 
				+ */
			
 
				+struct mmu_update {
			
 
				+    uint64_t ptr;       /* Machine address of PTE. */
			
 
				+    uint64_t val;       /* New contents of PTE.    */
			
 
				+};
			
 
				+typedef struct mmu_update mmu_update_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
			
 
				+
			
 
				+/*
			
 
				+ * ` enum neg_errnoval
			
 
				+ * ` HYPERVISOR_multicall(multicall_entry_t call_list[],
			
 
				+ * `                      uint32_t nr_calls);
			
 
				+ *
			
 
				+ * NB. The fields are logically the natural register size for this
			
 
				+ * architecture. In cases where xen_ulong_t is larger than this then
			
 
				+ * any unused bits in the upper portion must be zero.
			
 
				+ */
			
 
				+struct multicall_entry {
			
 
				+    xen_ulong_t op, result;
			
 
				+    xen_ulong_t args[6];
			
 
				+};
			
 
				+typedef struct multicall_entry multicall_entry_t;
			
 
				+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
			
 
				+
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00040400
			
 
				+/*
			
 
				+ * Event channel endpoints per domain (when using the 2-level ABI):
			
 
				+ *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
			
 
				+ */
			
 
				+#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS
			
 
				+#endif
			
 
				+
			
 
				+struct vcpu_time_info {
			
 
				+    /*
			
 
				+     * Updates to the following values are preceded and followed by an
			
 
				+     * increment of 'version'. The guest can therefore detect updates by
			
 
				+     * looking for changes to 'version'. If the least-significant bit of
			
 
				+     * the version number is set then an update is in progress and the guest
			
 
				+     * must wait to read a consistent set of values.
			
 
				+     * The correct way to interact with the version number is similar to
			
 
				+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
			
 
				+     */
			
 
				+    uint32_t version;
			
 
				+    uint32_t pad0;
			
 
				+    uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */
			
 
				+    uint64_t system_time;     /* Time, in nanosecs, since boot.    */
			
 
				+    /*
			
 
				+     * Current system time:
			
 
				+     *   system_time +
			
 
				+     *   ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
			
 
				+     * CPU frequency (Hz):
			
 
				+     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
			
 
				+     */
			
 
				+    uint32_t tsc_to_system_mul;
			
 
				+    int8_t   tsc_shift;
			
 
				+#if __XEN_INTERFACE_VERSION__ > 0x040600
			
 
				+    uint8_t  flags;
			
 
				+    uint8_t  pad1[2];
			
 
				+#else
			
 
				+    int8_t   pad1[3];
			
 
				+#endif
			
 
				+}; /* 32 bytes */
			
 
				+typedef struct vcpu_time_info vcpu_time_info_t;
			
 
				+
			
 
				+#define XEN_PVCLOCK_TSC_STABLE_BIT     (1 << 0)
			
 
				+#define XEN_PVCLOCK_GUEST_STOPPED      (1 << 1)
			
 
				+
			
 
				+struct vcpu_info {
			
 
				+    /*
			
 
				+     * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
			
 
				+     * a pending notification for a particular VCPU. It is then cleared
			
 
				+     * by the guest OS /before/ checking for pending work, thus avoiding
			
 
				+     * a set-and-check race. Note that the mask is only accessed by Xen
			
 
				+     * on the CPU that is currently hosting the VCPU. This means that the
			
 
				+     * pending and mask flags can be updated by the guest without special
			
 
				+     * synchronisation (i.e., no need for the x86 LOCK prefix).
			
 
				+     * This may seem suboptimal because if the pending flag is set by
			
 
				+     * a different CPU then an IPI may be scheduled even when the mask
			
 
				+     * is set. However, note:
			
 
				+     *  1. The task of 'interrupt holdoff' is covered by the per-event-
			
 
				+     *     channel mask bits. A 'noisy' event that is continually being
			
 
				+     *     triggered can be masked at source at this very precise
			
 
				+     *     granularity.
			
 
				+     *  2. The main purpose of the per-VCPU mask is therefore to restrict
			
 
				+     *     reentrant execution: whether for concurrency control, or to
			
 
				+     *     prevent unbounded stack usage. Whatever the purpose, we expect
			
 
				+     *     that the mask will be asserted only for short periods at a time,
			
 
				+     *     and so the likelihood of a 'spurious' IPI is suitably small.
			
 
				+     * The mask is read before making an event upcall to the guest: a
			
 
				+     * non-zero mask therefore guarantees that the VCPU will not receive
			
 
				+     * an upcall activation. The mask is cleared when the VCPU requests
			
 
				+     * to block: this avoids wakeup-waiting races.
			
 
				+     */
			
 
				+    uint8_t evtchn_upcall_pending;
			
 
				+#ifdef XEN_HAVE_PV_UPCALL_MASK
			
 
				+    uint8_t evtchn_upcall_mask;
			
 
				+#else /* XEN_HAVE_PV_UPCALL_MASK */
			
 
				+    uint8_t pad0;
			
 
				+#endif /* XEN_HAVE_PV_UPCALL_MASK */
			
 
				+    xen_ulong_t evtchn_pending_sel;
			
 
				+    struct arch_vcpu_info arch;
			
 
				+    vcpu_time_info_t time;
			
 
				+}; /* 64 bytes (x86) */
			
 
				+#ifndef __XEN__
			
 
				+typedef struct vcpu_info vcpu_info_t;
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 200 startofday_shared Start-of-day shared data structure
			
 
				+ * Xen/kernel shared data -- pointer provided in start_info.
			
 
				+ *
			
 
				+ * This structure is defined to be both smaller than a page, and the
			
 
				+ * only data on the shared page, but may vary in actual size even within
			
 
				+ * compatible Xen versions; guests should not rely on the size
			
 
				+ * of this structure remaining constant.
			
 
				+ */
			
 
				+struct shared_info {
			
 
				+    struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
			
 
				+
			
 
				+    /*
			
 
				+     * A domain can create "event channels" on which it can send and receive
			
 
				+     * asynchronous event notifications. There are three classes of event that
			
 
				+     * are delivered by this mechanism:
			
 
				+     *  1. Bi-directional inter- and intra-domain connections. Domains must
			
 
				+     *     arrange out-of-band to set up a connection (usually by allocating
			
 
				+     *     an unbound 'listener' port and avertising that via a storage service
			
 
				+     *     such as xenstore).
			
 
				+     *  2. Physical interrupts. A domain with suitable hardware-access
			
 
				+     *     privileges can bind an event-channel port to a physical interrupt
			
 
				+     *     source.
			
 
				+     *  3. Virtual interrupts ('events'). A domain can bind an event-channel
			
 
				+     *     port to a virtual interrupt source, such as the virtual-timer
			
 
				+     *     device or the emergency console.
			
 
				+     *
			
 
				+     * Event channels are addressed by a "port index". Each channel is
			
 
				+     * associated with two bits of information:
			
 
				+     *  1. PENDING -- notifies the domain that there is a pending notification
			
 
				+     *     to be processed. This bit is cleared by the guest.
			
 
				+     *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
			
 
				+     *     will cause an asynchronous upcall to be scheduled. This bit is only
			
 
				+     *     updated by the guest. It is read-only within Xen. If a channel
			
 
				+     *     becomes pending while the channel is masked then the 'edge' is lost
			
 
				+     *     (i.e., when the channel is unmasked, the guest must manually handle
			
 
				+     *     pending notifications as no upcall will be scheduled by Xen).
			
 
				+     *
			
 
				+     * To expedite scanning of pending notifications, any 0->1 pending
			
 
				+     * transition on an unmasked channel causes a corresponding bit in a
			
 
				+     * per-vcpu selector word to be set. Each bit in the selector covers a
			
 
				+     * 'C long' in the PENDING bitfield array.
			
 
				+     */
			
 
				+    xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8];
			
 
				+    xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];
			
 
				+
			
 
				+    /*
			
 
				+     * Wallclock time: updated by control software or RTC emulation.
			
 
				+     * Guests should base their gettimeofday() syscall on this
			
 
				+     * wallclock-base value.
			
 
				+     * The values of wc_sec and wc_nsec are offsets from the Unix epoch
			
 
				+     * adjusted by the domain's 'time offset' (in seconds) as set either
			
 
				+     * by XEN_DOMCTL_settimeoffset, or adjusted via a guest write to the
			
 
				+     * emulated RTC.
			
 
				+     */
			
 
				+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
			
 
				+    uint32_t wc_sec;
			
 
				+    uint32_t wc_nsec;
			
 
				+#if !defined(__i386__)
			
 
				+    uint32_t wc_sec_hi;
			
 
				+# define xen_wc_sec_hi wc_sec_hi
			
 
				+#elif !defined(__XEN__) && !defined(__XEN_TOOLS__)
			
 
				+# define xen_wc_sec_hi arch.wc_sec_hi
			
 
				+#endif
			
 
				+
			
 
				+    struct arch_shared_info arch;
			
 
				+
			
 
				+};
			
 
				+#ifndef __XEN__
			
 
				+typedef struct shared_info shared_info_t;
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * `incontents 200 startofday Start-of-day memory layout
			
 
				+ *
			
 
				+ *  1. The domain is started within contiguous virtual-memory region.
			
 
				+ *  2. The contiguous region ends on an aligned 4MB boundary.
			
 
				+ *  3. This the order of bootstrap elements in the initial virtual region:
			
 
				+ *      a. relocated kernel image
			
 
				+ *      b. initial ram disk              [mod_start, mod_len]
			
 
				+ *         (may be omitted)
			
 
				+ *      c. list of allocated page frames [mfn_list, nr_pages]
			
 
				+ *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)
			
 
				+ *      d. start_info_t structure        [register rSI (x86)]
			
 
				+ *         in case of dom0 this page contains the console info, too
			
 
				+ *      e. unless dom0: xenstore ring page
			
 
				+ *      f. unless dom0: console ring page
			
 
				+ *      g. bootstrap page tables         [pt_base and CR3 (x86)]
			
 
				+ *      h. bootstrap stack               [register ESP (x86)]
			
 
				+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
			
 
				+ *  5. The list of page frames forms a contiguous 'pseudo-physical' memory
			
 
				+ *     layout for the domain. In particular, the bootstrap virtual-memory
			
 
				+ *     region is a 1:1 mapping to the first section of the pseudo-physical map.
			
 
				+ *  6. All bootstrap elements are mapped read-writable for the guest OS. The
			
 
				+ *     only exception is the bootstrap page table, which is mapped read-only.
			
 
				+ *  7. There is guaranteed to be at least 512kB padding after the final
			
 
				+ *     bootstrap element. If necessary, the bootstrap virtual region is
			
 
				+ *     extended by an extra 4MB to ensure this.
			
 
				+ *
			
 
				+ * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page
			
 
				+ * table layout") a bug caused the pt_base (3.g above) and cr3 to not point
			
 
				+ * to the start of the guest page tables (it was offset by two pages).
			
 
				+ * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU
			
 
				+ * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got
			
 
				+ * allocated in the order: 'first L1','first L2', 'first L3', so the offset
			
 
				+ * to the page table base is by two pages back. The initial domain if it is
			
 
				+ * 32-bit and runs under a 64-bit hypervisor should _NOT_ use two of the
			
 
				+ * pages preceding pt_base and mark them as reserved/unused.
			
 
				+ */
			
 
				+#ifdef XEN_HAVE_PV_GUEST_ENTRY
			
 
				+struct start_info {
			
 
				+    /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
			
 
				+    char magic[32];             /* "xen-<version>-<platform>".            */
			
 
				+    unsigned long nr_pages;     /* Total pages allocated to this domain.  */
			
 
				+    unsigned long shared_info;  /* MACHINE address of shared info struct. */
			
 
				+    uint32_t flags;             /* SIF_xxx flags.                         */
			
 
				+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
			
 
				+    uint32_t store_evtchn;      /* Event channel for store communication. */
			
 
				+    union {
			
 
				+        struct {
			
 
				+            xen_pfn_t mfn;      /* MACHINE page number of console page.   */
			
 
				+            uint32_t  evtchn;   /* Event channel for console page.        */
			
 
				+        } domU;
			
 
				+        struct {
			
 
				+            uint32_t info_off;  /* Offset of console_info struct.         */
			
 
				+            uint32_t info_size; /* Size of console_info struct from start.*/
			
 
				+        } dom0;
			
 
				+    } console;
			
 
				+    /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
			
 
				+    unsigned long pt_base;      /* VIRTUAL address of page directory.     */
			
 
				+    unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
			
 
				+    unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
			
 
				+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module   */
			
 
				+                                /* (PFN of pre-loaded module if           */
			
 
				+                                /*  SIF_MOD_START_PFN set in flags).      */
			
 
				+    unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
			
 
				+#define MAX_GUEST_CMDLINE 1024
			
 
				+    int8_t cmd_line[MAX_GUEST_CMDLINE];
			
 
				+    /* The pfn range here covers both page table and p->m table frames.   */
			
 
				+    unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table.    */
			
 
				+    unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table.  */
			
 
				+};
			
 
				+typedef struct start_info start_info_t;
			
 
				+
			
 
				+/* New console union for dom0 introduced in 0x00030203. */
			
 
				+#if __XEN_INTERFACE_VERSION__ < 0x00030203
			
 
				+#define console_mfn    console.domU.mfn
			
 
				+#define console_evtchn console.domU.evtchn
			
 
				+#endif
			
 
				+#endif /* XEN_HAVE_PV_GUEST_ENTRY */
			
 
				+
			
 
				+/* These flags are passed in the 'flags' field of start_info_t. */
			
 
				+#define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
			
 
				+#define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
			
 
				+#define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
			
 
				+#define SIF_MOD_START_PFN (1<<3)  /* Is mod_start a PFN? */
			
 
				+#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */
			
 
				+                                   /* P->M making the 3 level tree obsolete? */
			
 
				+#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
			
 
				+
			
 
				+/*
			
 
				+ * A multiboot module is a package containing modules very similar to a
			
 
				+ * multiboot module array. The only differences are:
			
 
				+ * - the array of module descriptors is by convention simply at the beginning
			
 
				+ *   of the multiboot module,
			
 
				+ * - addresses in the module descriptors are based on the beginning of the
			
 
				+ *   multiboot module,
			
 
				+ * - the number of modules is determined by a termination descriptor that has
			
 
				+ *   mod_start == 0.
			
 
				+ *
			
 
				+ * This permits to both build it statically and reference it in a configuration
			
 
				+ * file, and let the PV guest easily rebase the addresses to virtual addresses
			
 
				+ * and at the same time count the number of modules.
			
 
				+ */
			
 
				+struct xen_multiboot_mod_list
			
 
				+{
			
 
				+    /* Address of first byte of the module */
			
 
				+    uint32_t mod_start;
			
 
				+    /* Address of last byte of the module (inclusive) */
			
 
				+    uint32_t mod_end;
			
 
				+    /* Address of zero-terminated command line */
			
 
				+    uint32_t cmdline;
			
 
				+    /* Unused, must be zero */
			
 
				+    uint32_t pad;
			
 
				+};
			
 
				+/*
			
 
				+ * `incontents 200 startofday_dom0_console Dom0_console
			
 
				+ *
			
 
				+ * The console structure in start_info.console.dom0
			
 
				+ *
			
 
				+ * This structure includes a variety of information required to
			
 
				+ * have a working VGA/VESA console.
			
 
				+ */
			
 
				+typedef struct dom0_vga_console_info {
			
 
				+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
			
 
				+#define XEN_VGATYPE_TEXT_MODE_3 0x03
			
 
				+#define XEN_VGATYPE_VESA_LFB    0x23
			
 
				+#define XEN_VGATYPE_EFI_LFB     0x70
			
 
				+
			
 
				+    union {
			
 
				+        struct {
			
 
				+            /* Font height, in pixels. */
			
 
				+            uint16_t font_height;
			
 
				+            /* Cursor location (column, row). */
			
 
				+            uint16_t cursor_x, cursor_y;
			
 
				+            /* Number of rows and columns (dimensions in characters). */
			
 
				+            uint16_t rows, columns;
			
 
				+        } text_mode_3;
			
 
				+
			
 
				+        struct {
			
 
				+            /* Width and height, in pixels. */
			
 
				+            uint16_t width, height;
			
 
				+            /* Bytes per scan line. */
			
 
				+            uint16_t bytes_per_line;
			
 
				+            /* Bits per pixel. */
			
 
				+            uint16_t bits_per_pixel;
			
 
				+            /* LFB physical address, and size (in units of 64kB). */
			
 
				+            uint32_t lfb_base;
			
 
				+            uint32_t lfb_size;
			
 
				+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
			
 
				+            uint8_t  red_pos, red_size;
			
 
				+            uint8_t  green_pos, green_size;
			
 
				+            uint8_t  blue_pos, blue_size;
			
 
				+            uint8_t  rsvd_pos, rsvd_size;
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
			
 
				+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
			
 
				+            uint32_t gbl_caps;
			
 
				+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
			
 
				+            uint16_t mode_attrs;
			
 
				+            uint16_t pad;
			
 
				+#endif
			
 
				+#if __XEN_INTERFACE_VERSION__ >= 0x00040d00
			
 
				+            /* high 32 bits of lfb_base */
			
 
				+            uint32_t ext_lfb_base;
			
 
				+#endif
			
 
				+        } vesa_lfb;
			
 
				+    } u;
			
 
				+} dom0_vga_console_info_t;
			
 
				+#define xen_vga_console_info dom0_vga_console_info
			
 
				+#define xen_vga_console_info_t dom0_vga_console_info_t
			
 
				+
			
 
				+typedef uint8_t xen_domain_handle_t[16];
			
 
				+
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
			
 
				+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
			
 
				+
			
 
				+typedef struct {
			
 
				+    uint8_t a[16];
			
 
				+} xen_uuid_t;
			
 
				+
			
 
				+/*
			
 
				+ * XEN_DEFINE_UUID(0x00112233, 0x4455, 0x6677, 0x8899,
			
 
				+ *                 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)
			
 
				+ * will construct UUID 00112233-4455-6677-8899-aabbccddeeff presented as
			
 
				+ * {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
			
 
				+ * 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
			
 
				+ *
			
 
				+ * NB: This is compatible with Linux kernel and with libuuid, but it is not
			
 
				+ * compatible with Microsoft, as they use mixed-endian encoding (some
			
 
				+ * components are little-endian, some are big-endian).
			
 
				+ */
			
 
				+#define XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6)            \
			
 
				+    {{((a) >> 24) & 0xFF, ((a) >> 16) & 0xFF,                           \
			
 
				+      ((a) >>  8) & 0xFF, ((a) >>  0) & 0xFF,                           \
			
 
				+      ((b) >>  8) & 0xFF, ((b) >>  0) & 0xFF,                           \
			
 
				+      ((c) >>  8) & 0xFF, ((c) >>  0) & 0xFF,                           \
			
 
				+      ((d) >>  8) & 0xFF, ((d) >>  0) & 0xFF,                           \
			
 
				+                e1, e2, e3, e4, e5, e6}}
			
 
				+
			
 
				+#if defined(__STDC_VERSION__) ? __STDC_VERSION__ >= 199901L : defined(__GNUC__)
			
 
				+#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6)             \
			
 
				+    ((xen_uuid_t)XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6))
			
 
				+#else
			
 
				+#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6)             \
			
 
				+    XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6)
			
 
				+#endif /* __STDC_VERSION__ / __GNUC__ */
			
 
				+
			
 
				+#endif /* !__ASSEMBLY__ */
			
 
				+
			
 
				+/* Default definitions for macros used by domctl/sysctl. */
			
 
				+#if defined(__XEN__) || defined(__XEN_TOOLS__)
			
 
				+
			
 
				+#ifndef int64_aligned_t
			
 
				+#define int64_aligned_t int64_t
			
 
				+#endif
			
 
				+#ifndef uint64_aligned_t
			
 
				+#define uint64_aligned_t uint64_t
			
 
				+#endif
			
 
				+#ifndef XEN_GUEST_HANDLE_64
			
 
				+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
			
 
				+#endif
			
 
				+
			
 
				+#ifndef __ASSEMBLY__
			
 
				+struct xenctl_bitmap {
			
 
				+    XEN_GUEST_HANDLE_64(uint8) bitmap;
			
 
				+    uint32_t nr_bits;
			
 
				+};
			
 
				+typedef struct xenctl_bitmap xenctl_bitmap_t;
			
 
				+#endif
			
 
				+
			
 
				+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
			
 
				+
			
 
				+#endif /* __XEN_PUBLIC_XEN_H__ */
			
 
				+
			
 
				+/*
			
 
				+ * Local variables:
			
 
				+ * mode: C
			
 
				+ * c-file-style: "BSD"
			
 
				+ * c-basic-offset: 4
			
 
				+ * tab-width: 4
			
 
				+ * indent-tabs-mode: nil
			
 
				+ * End:
			
 
				+ */
			
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -42,8 +42,7 @@ int xenstore_read_fe_uint64(struct XenLegacyDevice *xendev, const char *node,
 
				 void xen_be_check_state(struct XenLegacyDevice *xendev);
			
 
				 
			
 
				 /* xen backend driver bits */
			
 
				-int xen_be_init(void);
			
 
				-void xen_be_register_common(void);
			
 
				+void xen_be_init(void);
			
 
				 int xen_be_register(const char *type, struct XenDevOps *ops);
			
 
				 int xen_be_set_state(struct XenLegacyDevice *xendev, enum xenbus_state state);
			
 
				 int xen_be_bind_evtchn(struct XenLegacyDevice *xendev);
			
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -1,19 +1,30 @@
 
				-#ifndef QEMU_HW_XEN_H
			
 
				-#define QEMU_HW_XEN_H
			
 
				-
			
 
				 /*
			
 
				  * public xen header
			
 
				  *   stuff needed outside xen-*.c, i.e. interfaces to qemu.
			
 
				  *   must not depend on any xen headers being present in
			
 
				  *   /usr/include/xen, so it can be included unconditionally.
			
 
				  */
			
 
				+#ifndef QEMU_HW_XEN_H
			
 
				+#define QEMU_HW_XEN_H
			
 
				+
			
 
				+/*
			
 
				+ * As a temporary measure while the headers are being untangled, define
			
 
				+ * __XEN_TOOLS__ here before any Xen headers are included. Otherwise, if
			
 
				+ * the Xen toolstack library headers are later included, they will find
			
 
				+ * some of the "internal" definitions missing and the build will fail. In
			
 
				+ * later commits, we'll end up with a rule that the native libraries have
			
 
				+ * to be included first, which will ensure that the libraries get the
			
 
				+ * version of Xen libraries that they expect.
			
 
				+ */
			
 
				+#define __XEN_TOOLS__ 1
			
 
				 
			
 
				 #include "exec/cpu-common.h"
			
 
				 
			
 
				 /* xen-machine.c */
			
 
				 enum xen_mode {
			
 
				-    XEN_EMULATE = 0,  // xen emulation, using xenner (default)
			
 
				-    XEN_ATTACH        // attach to xen domain created by libxl
			
 
				+    XEN_DISABLED = 0, /* xen support disabled (default) */
			
 
				+    XEN_ATTACH,       /* attach to xen domain created by libxl */
			
 
				+    XEN_EMULATE,      /* emulate Xen within QEMU */
			
 
				 };
			
 
				 
			
 
				 extern uint32_t xen_domid;
			
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -114,6 +114,8 @@ void hmp_virtio_status(Monitor *mon, const QDict *qdict);
 
				 void hmp_virtio_queue_status(Monitor *mon, const QDict *qdict);
			
 
				 void hmp_vhost_queue_status(Monitor *mon, const QDict *qdict);
			
 
				 void hmp_virtio_queue_element(Monitor *mon, const QDict *qdict);
			
 
				+void hmp_xen_event_inject(Monitor *mon, const QDict *qdict);
			
 
				+void hmp_xen_event_list(Monitor *mon, const QDict *qdict);
			
 
				 void object_add_completion(ReadLineState *rs, int nb_args, const char *str);
			
 
				 void object_del_completion(ReadLineState *rs, int nb_args, const char *str);
			
 
				 void device_add_completion(ReadLineState *rs, int nb_args, const char *str);
			
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -118,6 +118,10 @@ struct KVMState
 
				     struct KVMDirtyRingReaper reaper;
			
 
				     NotifyVmexitOption notify_vmexit;
			
 
				     uint32_t notify_window;
			
 
				+    uint32_t xen_version;
			
 
				+    uint32_t xen_caps;
			
 
				+    uint16_t xen_gnttab_max_frames;
			
 
				+    uint16_t xen_evtchn_max_pirq;
			
 
				 };
			
 
				 
			
 
				 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
			
--- a/include/sysemu/kvm_xen.h
+++ b/include/sysemu/kvm_xen.h
@@ -0,0 +1,43 @@
 
				+/*
			
 
				+ * Xen HVM emulation support in KVM
			
 
				+ *
			
 
				+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_SYSEMU_KVM_XEN_H
			
 
				+#define QEMU_SYSEMU_KVM_XEN_H
			
 
				+
			
 
				+/* The KVM API uses these to indicate "no GPA" or "no GFN" */
			
 
				+#define INVALID_GPA UINT64_MAX
			
 
				+#define INVALID_GFN UINT64_MAX
			
 
				+
			
 
				+/* QEMU plays the rôle of dom0 for "interdomain" communication. */
			
 
				+#define DOMID_QEMU  0
			
 
				+
			
 
				+int kvm_xen_soft_reset(void);
			
 
				+uint32_t kvm_xen_get_caps(void);
			
 
				+void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id);
			
 
				+void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type);
			
 
				+void kvm_xen_set_callback_asserted(void);
			
 
				+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port);
			
 
				+uint16_t kvm_xen_get_gnttab_max_frames(void);
			
 
				+uint16_t kvm_xen_get_evtchn_max_pirq(void);
			
 
				+
			
 
				+#define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() &           \
			
 
				+                                 KVM_XEN_HVM_CONFIG_ ## cap))
			
 
				+
			
 
				+#define XEN_SPECIAL_AREA_ADDR 0xfeff8000UL
			
 
				+#define XEN_SPECIAL_AREA_SIZE 0x4000UL
			
 
				+
			
 
				+#define XEN_SPECIALPAGE_CONSOLE     0
			
 
				+#define XEN_SPECIALPAGE_XENSTORE    1
			
 
				+
			
 
				+#define XEN_SPECIAL_PFN(x) ((XEN_SPECIAL_AREA_ADDR >> TARGET_PAGE_BITS) + \
			
 
				+                            XEN_SPECIALPAGE_##x)
			
 
				+
			
 
				+#endif /* QEMU_SYSEMU_KVM_XEN_H */
			
--- a/meson.build
+++ b/meson.build
@@ -2982,6 +2982,7 @@ if have_system
 
				     'hw/i2c',
			
 
				     'hw/i386',
			
 
				     'hw/i386/xen',
			
 
				+    'hw/i386/kvm',
			
 
				     'hw/ide',
			
 
				     'hw/input',
			
 
				     'hw/intc',
			
@@ -3881,6 +3882,7 @@ if have_system
 
				   if xen.found()
			
 
				     summary_info += {'xen ctrl version':  xen.version()}
			
 
				   endif
			
 
				+  summary_info += {'Xen emulation':     config_all.has_key('CONFIG_XEN_EMU')}
			
 
				 endif
			
 
				 summary_info += {'TCG support':       config_all.has_key('CONFIG_TCG')}
			
 
				 if config_all.has_key('CONFIG_TCG')
			
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -155,6 +155,8 @@
 
				 #
			
 
				 # @default-ram-id: the default ID of initial RAM memory backend (since 5.2)
			
 
				 #
			
 
				+# @acpi: machine type supports ACPI (since 8.0)
			
 
				+#
			
 
				 # Since: 1.2
			
 
				 ##
			
 
				 { 'struct': 'MachineInfo',
			
@@ -162,7 +164,7 @@
 
				             '*is-default': 'bool', 'cpu-max': 'int',
			
 
				             'hotpluggable-cpus': 'bool',  'numa-mem-supported': 'bool',
			
 
				             'deprecated': 'bool', '*default-cpu-type': 'str',
			
 
				-            '*default-ram-id': 'str' } }
			
 
				+            '*default-ram-id': 'str', 'acpi': 'bool' } }
			
 
				 
			
 
				 ##
			
 
				 # @query-machines:
			
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -380,3 +380,119 @@
 
				 #
			
 
				 ##
			
 
				 { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' }
			
 
				+
			
 
				+
			
 
				+##
			
 
				+# @EvtchnPortType:
			
 
				+#
			
 
				+# An enumeration of Xen event channel port types.
			
 
				+#
			
 
				+# @closed: The port is unused.
			
 
				+#
			
 
				+# @unbound: The port is allocated and ready to be bound.
			
 
				+#
			
 
				+# @interdomain: The port is connected as an interdomain interrupt.
			
 
				+#
			
 
				+# @pirq: The port is bound to a physical IRQ (PIRQ).
			
 
				+#
			
 
				+# @virq: The port is bound to a virtual IRQ (VIRQ).
			
 
				+#
			
 
				+# @ipi: The post is an inter-processor interrupt (IPI).
			
 
				+#
			
 
				+# Since: 8.0
			
 
				+##
			
 
				+{ 'enum': 'EvtchnPortType',
			
 
				+  'data': ['closed', 'unbound', 'interdomain', 'pirq', 'virq', 'ipi'],
			
 
				+  'if': 'TARGET_I386' }
			
 
				+
			
 
				+##
			
 
				+# @EvtchnInfo:
			
 
				+#
			
 
				+# Information about a Xen event channel port
			
 
				+#
			
 
				+# @port: the port number
			
 
				+#
			
 
				+# @vcpu: target vCPU for this port
			
 
				+#
			
 
				+# @type: the port type
			
 
				+#
			
 
				+# @remote-domain: remote domain for interdomain ports
			
 
				+#
			
 
				+# @target: remote port ID, or virq/pirq number
			
 
				+#
			
 
				+# @pending: port is currently active pending delivery
			
 
				+#
			
 
				+# @masked: port is masked
			
 
				+#
			
 
				+# Since: 8.0
			
 
				+##
			
 
				+{ 'struct': 'EvtchnInfo',
			
 
				+  'data': {'port': 'uint16',
			
 
				+           'vcpu': 'uint32',
			
 
				+           'type': 'EvtchnPortType',
			
 
				+           'remote-domain': 'str',
			
 
				+           'target': 'uint16',
			
 
				+           'pending': 'bool',
			
 
				+           'masked': 'bool'},
			
 
				+  'if': 'TARGET_I386' }
			
 
				+
			
 
				+
			
 
				+##
			
 
				+# @xen-event-list:
			
 
				+#
			
 
				+# Query the Xen event channels opened by the guest.
			
 
				+#
			
 
				+# Returns: list of open event channel ports.
			
 
				+#
			
 
				+# Since: 8.0
			
 
				+#
			
 
				+# Example:
			
 
				+#
			
 
				+# -> { "execute": "xen-event-list" }
			
 
				+# <- { "return": [
			
 
				+#         {
			
 
				+#             "pending": false,
			
 
				+#             "port": 1,
			
 
				+#             "vcpu": 1,
			
 
				+#             "remote-domain": "qemu",
			
 
				+#             "masked": false,
			
 
				+#             "type": "interdomain",
			
 
				+#             "target": 1
			
 
				+#         },
			
 
				+#         {
			
 
				+#             "pending": false,
			
 
				+#             "port": 2,
			
 
				+#             "vcpu": 0,
			
 
				+#             "remote-domain": "",
			
 
				+#             "masked": false,
			
 
				+#             "type": "virq",
			
 
				+#             "target": 0
			
 
				+#         }
			
 
				+#      ]
			
 
				+#    }
			
 
				+#
			
 
				+##
			
 
				+{ 'command': 'xen-event-list',
			
 
				+  'returns': ['EvtchnInfo'],
			
 
				+  'if': 'TARGET_I386' }
			
 
				+
			
 
				+##
			
 
				+# @xen-event-inject:
			
 
				+#
			
 
				+# Inject a Xen event channel port (interrupt) to the guest.
			
 
				+#
			
 
				+# @port: The port number
			
 
				+#
			
 
				+# Returns: - Nothing on success.
			
 
				+#
			
 
				+# Since: 8.0
			
 
				+#
			
 
				+# Example:
			
 
				+#
			
 
				+# -> { "execute": "xen-event-inject", "arguments": { "port": 1 } }
			
 
				+# <- { "return": { } }
			
 
				+#
			
 
				+##
			
 
				+{ 'command': 'xen-event-inject',
			
 
				+  'data': { 'port': 'uint32' },
			
 
				+  'if': 'TARGET_I386' }
			
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -63,5 +63,5 @@ QemuUUID qemu_uuid;
 
				 bool qemu_uuid_set;
			
 
				 
			
 
				 uint32_t xen_domid;
			
 
				-enum xen_mode xen_mode = XEN_EMULATE;
			
 
				+enum xen_mode xen_mode = XEN_DISABLED;
			
 
				 bool xen_domid_restrict;
			
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -3360,7 +3360,7 @@ void qemu_init(int argc, char **argv)
 
				                 has_defaults = 0;
			
 
				                 break;
			
 
				             case QEMU_OPTION_xen_domid:
			
 
				-                if (!(accel_find("xen"))) {
			
 
				+                if (!(accel_find("xen")) && !(accel_find("kvm"))) {
			
 
				                     error_report("Option not supported for this target");
			
 
				                     exit(1);
			
 
				                 }
			
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7209,6 +7209,7 @@ static Property x86_cpu_properties[] = {
 
				      * own cache information (see x86_cpu_load_def()).
			
 
				      */
			
 
				     DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true),
			
 
				+    DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false),
			
 
				 
			
 
				     /*
			
 
				      * From "Requirements for Implementing the Microsoft
			
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -26,6 +26,9 @@
 
				 #include "exec/cpu-defs.h"
			
 
				 #include "qapi/qapi-types-common.h"
			
 
				 #include "qemu/cpu-float.h"
			
 
				+#include "qemu/timer.h"
			
 
				+
			
 
				+#define XEN_NR_VIRQS 24
			
 
				 
			
 
				 /* The x86 has a strong memory model with some store-after-load re-ordering */
			
 
				 #define TCG_GUEST_DEFAULT_MO      (TCG_MO_ALL & ~TCG_MO_ST_LD)
			
@@ -1799,6 +1802,20 @@ typedef struct CPUArchState {
 
				 #endif
			
 
				 #if defined(CONFIG_KVM)
			
 
				     struct kvm_nested_state *nested_state;
			
 
				+    MemoryRegion *xen_vcpu_info_mr;
			
 
				+    void *xen_vcpu_info_hva;
			
 
				+    uint64_t xen_vcpu_info_gpa;
			
 
				+    uint64_t xen_vcpu_info_default_gpa;
			
 
				+    uint64_t xen_vcpu_time_info_gpa;
			
 
				+    uint64_t xen_vcpu_runstate_gpa;
			
 
				+    uint8_t xen_vcpu_callback_vector;
			
 
				+    bool xen_callback_asserted;
			
 
				+    uint16_t xen_virq[XEN_NR_VIRQS];
			
 
				+    uint64_t xen_singleshot_timer_ns;
			
 
				+    QEMUTimer *xen_singleshot_timer;
			
 
				+    uint64_t xen_periodic_timer_period;
			
 
				+    QEMUTimer *xen_periodic_timer;
			
 
				+    QemuMutex xen_timers_lock;
			
 
				 #endif
			
 
				 #if defined(CONFIG_HVF)
			
 
				     HVFX86LazyFlags hvf_lflags;
			
@@ -1975,6 +1992,8 @@ struct ArchCPU {
 
				     int32_t thread_id;
			
 
				 
			
 
				     int32_t hv_max_vps;
			
 
				+
			
 
				+    bool xen_vapic;
			
 
				 };
			
 
				 
			
 
				 
			
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -22,6 +22,7 @@
 
				 
			
 
				 #include <linux/kvm.h>
			
 
				 #include "standard-headers/asm-x86/kvm_para.h"
			
 
				+#include "hw/xen/interface/arch-x86/cpuid.h"
			
 
				 
			
 
				 #include "cpu.h"
			
 
				 #include "host-cpu.h"
			
@@ -31,6 +32,7 @@
 
				 #include "sysemu/runstate.h"
			
 
				 #include "kvm_i386.h"
			
 
				 #include "sev.h"
			
 
				+#include "xen-emu.h"
			
 
				 #include "hyperv.h"
			
 
				 #include "hyperv-proto.h"
			
 
				 
			
@@ -42,6 +44,8 @@
 
				 #include "qemu/error-report.h"
			
 
				 #include "qemu/memalign.h"
			
 
				 #include "hw/i386/x86.h"
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+#include "hw/i386/pc.h"
			
 
				 #include "hw/i386/apic.h"
			
 
				 #include "hw/i386/apic_internal.h"
			
 
				 #include "hw/i386/apic-msidef.h"
			
@@ -49,6 +53,8 @@
 
				 #include "hw/i386/x86-iommu.h"
			
 
				 #include "hw/i386/e820_memory_layout.h"
			
 
				 
			
 
				+#include "hw/xen/xen.h"
			
 
				+
			
 
				 #include "hw/pci/pci.h"
			
 
				 #include "hw/pci/msi.h"
			
 
				 #include "hw/pci/msix.h"
			
@@ -1815,7 +1821,82 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
				         has_msr_hv_hypercall = true;
			
 
				     }
			
 
				 
			
 
				-    if (cpu->expose_kvm) {
			
 
				+    if (cs->kvm_state->xen_version) {
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+        struct kvm_cpuid_entry2 *xen_max_leaf;
			
 
				+
			
 
				+        memcpy(signature, "XenVMMXenVMM", 12);
			
 
				+
			
 
				+        xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
			
 
				+        c->function = kvm_base + XEN_CPUID_SIGNATURE;
			
 
				+        c->eax = kvm_base + XEN_CPUID_TIME;
			
 
				+        c->ebx = signature[0];
			
 
				+        c->ecx = signature[1];
			
 
				+        c->edx = signature[2];
			
 
				+
			
 
				+        c = &cpuid_data.entries[cpuid_i++];
			
 
				+        c->function = kvm_base + XEN_CPUID_VENDOR;
			
 
				+        c->eax = cs->kvm_state->xen_version;
			
 
				+        c->ebx = 0;
			
 
				+        c->ecx = 0;
			
 
				+        c->edx = 0;
			
 
				+
			
 
				+        c = &cpuid_data.entries[cpuid_i++];
			
 
				+        c->function = kvm_base + XEN_CPUID_HVM_MSR;
			
 
				+        /* Number of hypercall-transfer pages */
			
 
				+        c->eax = 1;
			
 
				+        /* Hypercall MSR base address */
			
 
				+        if (hyperv_enabled(cpu)) {
			
 
				+            c->ebx = XEN_HYPERCALL_MSR_HYPERV;
			
 
				+            kvm_xen_init(cs->kvm_state, c->ebx);
			
 
				+        } else {
			
 
				+            c->ebx = XEN_HYPERCALL_MSR;
			
 
				+        }
			
 
				+        c->ecx = 0;
			
 
				+        c->edx = 0;
			
 
				+
			
 
				+        c = &cpuid_data.entries[cpuid_i++];
			
 
				+        c->function = kvm_base + XEN_CPUID_TIME;
			
 
				+        c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
			
 
				+            (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
			
 
				+        /* default=0 (emulate if necessary) */
			
 
				+        c->ebx = 0;
			
 
				+        /* guest tsc frequency */
			
 
				+        c->ecx = env->user_tsc_khz;
			
 
				+        /* guest tsc incarnation (migration count) */
			
 
				+        c->edx = 0;
			
 
				+
			
 
				+        c = &cpuid_data.entries[cpuid_i++];
			
 
				+        c->function = kvm_base + XEN_CPUID_HVM;
			
 
				+        xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
			
 
				+        if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
			
 
				+            c->function = kvm_base + XEN_CPUID_HVM;
			
 
				+
			
 
				+            if (cpu->xen_vapic) {
			
 
				+                c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
			
 
				+                c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
			
 
				+            }
			
 
				+
			
 
				+            c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
			
 
				+
			
 
				+            if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
			
 
				+                c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
			
 
				+                c->ebx = cs->cpu_index;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        r = kvm_xen_init_vcpu(cs);
			
 
				+        if (r) {
			
 
				+            return r;
			
 
				+        }
			
 
				+
			
 
				+        kvm_base += 0x100;
			
 
				+#else /* CONFIG_XEN_EMU */
			
 
				+        /* This should never happen as kvm_arch_init() would have died first. */
			
 
				+        fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
			
 
				+        abort();
			
 
				+#endif
			
 
				+    } else if (cpu->expose_kvm) {
			
 
				         memcpy(signature, "KVMKVMKVM\0\0\0", 12);
			
 
				         c = &cpuid_data.entries[cpuid_i++];
			
 
				         c->function = KVM_CPUID_SIGNATURE | kvm_base;
			
@@ -2529,6 +2610,24 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 
				         }
			
 
				     }
			
 
				 
			
 
				+    if (s->xen_version) {
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+        if (!object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)) {
			
 
				+            error_report("kvm: Xen support only available in PC machine");
			
 
				+            return -ENOTSUP;
			
 
				+        }
			
 
				+        /* hyperv_enabled() doesn't work yet. */
			
 
				+        uint32_t msr = XEN_HYPERCALL_MSR;
			
 
				+        ret = kvm_xen_init(s, msr);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+#else
			
 
				+        error_report("kvm: Xen support not enabled in qemu");
			
 
				+        return -ENOTSUP;
			
 
				+#endif
			
 
				+    }
			
 
				+
			
 
				     ret = kvm_get_supported_msrs(s);
			
 
				     if (ret < 0) {
			
 
				         return ret;
			
@@ -4652,6 +4751,15 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
 
				         kvm_arch_set_tsc_khz(cpu);
			
 
				     }
			
 
				 
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    if (xen_mode == XEN_EMULATE && level == KVM_PUT_FULL_STATE) {
			
 
				+        ret = kvm_put_xen_state(cpu);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				     ret = kvm_getput_regs(x86_cpu, 1);
			
 
				     if (ret < 0) {
			
 
				         return ret;
			
@@ -4751,6 +4859,14 @@ int kvm_arch_get_registers(CPUState *cs)
 
				     if (ret < 0) {
			
 
				         goto out;
			
 
				     }
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        ret = kvm_get_xen_state(cs);
			
 
				+        if (ret < 0) {
			
 
				+            goto out;
			
 
				+        }
			
 
				+    }
			
 
				+#endif
			
 
				     ret = 0;
			
 
				  out:
			
 
				     cpu_sync_bndcs_hflags(&cpu->env);
			
@@ -4875,6 +4991,17 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 
				         kvm_rate_limit_on_bus_lock();
			
 
				     }
			
 
				 
			
 
				+    /*
			
 
				+     * If the callback is asserted as a GSI (or PCI INTx) then check if
			
 
				+     * vcpu_info->evtchn_upcall_pending has been cleared, and deassert
			
 
				+     * the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC
			
 
				+     * EOI and only resample then, exactly how the VFIO eventfd pairs
			
 
				+     * are designed to work for level triggered interrupts.
			
 
				+     */
			
 
				+    if (x86_cpu->env.xen_callback_asserted) {
			
 
				+        kvm_xen_maybe_deassert_callback(cpu);
			
 
				+    }
			
 
				+
			
 
				     /* We need to protect the apic state against concurrent accesses from
			
 
				      * different threads in case the userspace irqchip is used. */
			
 
				     if (!kvm_irqchip_in_kernel()) {
			
@@ -5395,6 +5522,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 
				         assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER);
			
 
				         ret = kvm_handle_wrmsr(cpu, run);
			
 
				         break;
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    case KVM_EXIT_XEN:
			
 
				+        ret = kvm_xen_handle_exit(cpu, &run->xen);
			
 
				+        break;
			
 
				+#endif
			
 
				     default:
			
 
				         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
			
 
				         ret = -1;
			
@@ -5523,6 +5655,20 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
 
				         }
			
 
				     }
			
 
				 
			
 
				+#ifdef CONFIG_XEN_EMU
			
 
				+    if (xen_mode == XEN_EMULATE) {
			
 
				+        int handled = xen_evtchn_translate_pirq_msi(route, address, data);
			
 
				+
			
 
				+        /*
			
 
				+         * If it was a PIRQ and successfully routed (handled == 0) or it was
			
 
				+         * an error (handled < 0), return. If it wasn't a PIRQ, keep going.
			
 
				+         */
			
 
				+        if (handled <= 0) {
			
 
				+            return handled;
			
 
				+        }
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				     address = kvm_swizzle_msi_ext_dest_id(address);
			
 
				     route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
			
 
				     route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
			
@@ -5542,8 +5688,8 @@ struct MSIRouteEntry {
 
				 static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
			
 
				     QLIST_HEAD_INITIALIZER(msi_route_list);
			
 
				 
			
 
				-static void kvm_update_msi_routes_all(void *private, bool global,
			
 
				-                                      uint32_t index, uint32_t mask)
			
 
				+void kvm_update_msi_routes_all(void *private, bool global,
			
 
				+                               uint32_t index, uint32_t mask)
			
 
				 {
			
 
				     int cnt = 0, vector;
			
 
				     MSIRouteEntry *entry;
			
@@ -5719,6 +5865,90 @@ static void kvm_arch_set_notify_window(Object *obj, Visitor *v,
 
				     s->notify_window = value;
			
 
				 }
			
 
				 
			
 
				+static void kvm_arch_get_xen_version(Object *obj, Visitor *v,
			
 
				+                                     const char *name, void *opaque,
			
 
				+                                     Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    uint32_t value = s->xen_version;
			
 
				+
			
 
				+    visit_type_uint32(v, name, &value, errp);
			
 
				+}
			
 
				+
			
 
				+static void kvm_arch_set_xen_version(Object *obj, Visitor *v,
			
 
				+                                     const char *name, void *opaque,
			
 
				+                                     Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    Error *error = NULL;
			
 
				+    uint32_t value;
			
 
				+
			
 
				+    visit_type_uint32(v, name, &value, &error);
			
 
				+    if (error) {
			
 
				+        error_propagate(errp, error);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    s->xen_version = value;
			
 
				+    if (value && xen_mode == XEN_DISABLED) {
			
 
				+        xen_mode = XEN_EMULATE;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void kvm_arch_get_xen_gnttab_max_frames(Object *obj, Visitor *v,
			
 
				+                                               const char *name, void *opaque,
			
 
				+                                               Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    uint16_t value = s->xen_gnttab_max_frames;
			
 
				+
			
 
				+    visit_type_uint16(v, name, &value, errp);
			
 
				+}
			
 
				+
			
 
				+static void kvm_arch_set_xen_gnttab_max_frames(Object *obj, Visitor *v,
			
 
				+                                               const char *name, void *opaque,
			
 
				+                                               Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    Error *error = NULL;
			
 
				+    uint16_t value;
			
 
				+
			
 
				+    visit_type_uint16(v, name, &value, &error);
			
 
				+    if (error) {
			
 
				+        error_propagate(errp, error);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    s->xen_gnttab_max_frames = value;
			
 
				+}
			
 
				+
			
 
				+static void kvm_arch_get_xen_evtchn_max_pirq(Object *obj, Visitor *v,
			
 
				+                                             const char *name, void *opaque,
			
 
				+                                             Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    uint16_t value = s->xen_evtchn_max_pirq;
			
 
				+
			
 
				+    visit_type_uint16(v, name, &value, errp);
			
 
				+}
			
 
				+
			
 
				+static void kvm_arch_set_xen_evtchn_max_pirq(Object *obj, Visitor *v,
			
 
				+                                             const char *name, void *opaque,
			
 
				+                                             Error **errp)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(obj);
			
 
				+    Error *error = NULL;
			
 
				+    uint16_t value;
			
 
				+
			
 
				+    visit_type_uint16(v, name, &value, &error);
			
 
				+    if (error) {
			
 
				+        error_propagate(errp, error);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    s->xen_evtchn_max_pirq = value;
			
 
				+}
			
 
				+
			
 
				 void kvm_arch_accel_class_init(ObjectClass *oc)
			
 
				 {
			
 
				     object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption",
			
@@ -5735,6 +5965,29 @@ void kvm_arch_accel_class_init(ObjectClass *oc)
 
				     object_class_property_set_description(oc, "notify-window",
			
 
				                                           "Clock cycles without an event window "
			
 
				                                           "after which a notification VM exit occurs");
			
 
				+
			
 
				+    object_class_property_add(oc, "xen-version", "uint32",
			
 
				+                              kvm_arch_get_xen_version,
			
 
				+                              kvm_arch_set_xen_version,
			
 
				+                              NULL, NULL);
			
 
				+    object_class_property_set_description(oc, "xen-version",
			
 
				+                                          "Xen version to be emulated "
			
 
				+                                          "(in XENVER_version form "
			
 
				+                                          "e.g. 0x4000a for 4.10)");
			
 
				+
			
 
				+    object_class_property_add(oc, "xen-gnttab-max-frames", "uint16",
			
 
				+                              kvm_arch_get_xen_gnttab_max_frames,
			
 
				+                              kvm_arch_set_xen_gnttab_max_frames,
			
 
				+                              NULL, NULL);
			
 
				+    object_class_property_set_description(oc, "xen-gnttab-max-frames",
			
 
				+                                          "Maximum number of grant table frames");
			
 
				+
			
 
				+    object_class_property_add(oc, "xen-evtchn-max-pirq", "uint16",
			
 
				+                              kvm_arch_get_xen_evtchn_max_pirq,
			
 
				+                              kvm_arch_set_xen_evtchn_max_pirq,
			
 
				+                              NULL, NULL);
			
 
				+    object_class_property_set_description(oc, "xen-evtchn-max-pirq",
			
 
				+                                          "Maximum number of Xen PIRQs");
			
 
				 }
			
 
				 
			
 
				 void kvm_set_max_apic_id(uint32_t max_apic_id)
			
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -51,6 +51,8 @@ bool kvm_hv_vpindex_settable(void);
 
				 bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
			
 
				 
			
 
				 uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
			
 
				+void kvm_update_msi_routes_all(void *private, bool global,
			
 
				+                               uint32_t index, uint32_t mask);
			
 
				 
			
 
				 bool kvm_enable_sgx_provisioning(KVMState *s);
			
 
				 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
			
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -7,6 +7,8 @@ i386_softmmu_kvm_ss.add(files(
 
				   'kvm-cpu.c',
			
 
				 ))
			
 
				 
			
 
				+i386_softmmu_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
			
 
				+
			
 
				 i386_softmmu_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
			
 
				 
			
 
				 i386_softmmu_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
			
--- a/target/i386/kvm/trace-events
+++ b/target/i386/kvm/trace-events
@@ -5,3 +5,10 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %"
 
				 kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d"
			
 
				 kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d"
			
 
				 kvm_x86_update_msi_routes(int num) "Updated %d MSI routes"
			
 
				+
			
 
				+# xen-emu.c
			
 
				+kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
			
 
				+kvm_xen_soft_reset(void) ""
			
 
				+kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64
			
 
				+kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIx64
			
 
				+kvm_xen_set_vcpu_callback(int cpu, int vector) "callback vcpu %d vector %d"
			
--- a/target/i386/kvm/xen-compat.h
+++ b/target/i386/kvm/xen-compat.h
@@ -0,0 +1,70 @@
 
				+/*
			
 
				+ * Xen HVM emulation support in KVM
			
 
				+ *
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_I386_KVM_XEN_COMPAT_H
			
 
				+#define QEMU_I386_KVM_XEN_COMPAT_H
			
 
				+
			
 
				+#include "hw/xen/interface/memory.h"
			
 
				+
			
 
				+typedef uint32_t compat_pfn_t;
			
 
				+typedef uint32_t compat_ulong_t;
			
 
				+typedef uint32_t compat_ptr_t;
			
 
				+
			
 
				+#define __DEFINE_COMPAT_HANDLE(name, type)      \
			
 
				+    typedef struct {                            \
			
 
				+        compat_ptr_t c;                         \
			
 
				+        type *_[0] __attribute__((packed));   \
			
 
				+    } __compat_handle_ ## name;                 \
			
 
				+
			
 
				+#define DEFINE_COMPAT_HANDLE(name) __DEFINE_COMPAT_HANDLE(name, name)
			
 
				+#define COMPAT_HANDLE(name) __compat_handle_ ## name
			
 
				+
			
 
				+DEFINE_COMPAT_HANDLE(compat_pfn_t);
			
 
				+DEFINE_COMPAT_HANDLE(compat_ulong_t);
			
 
				+DEFINE_COMPAT_HANDLE(int);
			
 
				+
			
 
				+struct compat_xen_add_to_physmap {
			
 
				+    domid_t domid;
			
 
				+    uint16_t size;
			
 
				+    unsigned int space;
			
 
				+    compat_ulong_t idx;
			
 
				+    compat_pfn_t gpfn;
			
 
				+};
			
 
				+
			
 
				+struct compat_xen_add_to_physmap_batch {
			
 
				+    domid_t domid;
			
 
				+    uint16_t space;
			
 
				+    uint16_t size;
			
 
				+    uint16_t extra;
			
 
				+    COMPAT_HANDLE(compat_ulong_t) idxs;
			
 
				+    COMPAT_HANDLE(compat_pfn_t) gpfns;
			
 
				+    COMPAT_HANDLE(int) errs;
			
 
				+};
			
 
				+
			
 
				+struct compat_physdev_map_pirq {
			
 
				+    domid_t domid;
			
 
				+    uint16_t pad;
			
 
				+    /* IN */
			
 
				+    int type;
			
 
				+    /* IN (ignored for ..._MULTI_MSI) */
			
 
				+    int index;
			
 
				+    /* IN or OUT */
			
 
				+    int pirq;
			
 
				+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
			
 
				+    int bus;
			
 
				+    /* IN */
			
 
				+    int devfn;
			
 
				+    /* IN (also OUT for ..._MULTI_MSI) */
			
 
				+    int entry_nr;
			
 
				+    /* IN */
			
 
				+    uint64_t table_base;
			
 
				+} __attribute__((packed));
			
 
				+
			
 
				+#endif /* QEMU_I386_XEN_COMPAT_H */
			
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -0,0 +1,1897 @@
 
				+/*
			
 
				+ * Xen HVM emulation support in KVM
			
 
				+ *
			
 
				+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include "qemu/osdep.h"
			
 
				+#include "qemu/log.h"
			
 
				+#include "qemu/main-loop.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				+#include "sysemu/kvm_int.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				+#include "kvm/kvm_i386.h"
			
 
				+#include "exec/address-spaces.h"
			
 
				+#include "xen-emu.h"
			
 
				+#include "trace.h"
			
 
				+#include "sysemu/runstate.h"
			
 
				+
			
 
				+#include "hw/pci/msi.h"
			
 
				+#include "hw/i386/apic-msidef.h"
			
 
				+#include "hw/i386/e820_memory_layout.h"
			
 
				+#include "hw/i386/kvm/xen_overlay.h"
			
 
				+#include "hw/i386/kvm/xen_evtchn.h"
			
 
				+#include "hw/i386/kvm/xen_gnttab.h"
			
 
				+#include "hw/i386/kvm/xen_xenstore.h"
			
 
				+
			
 
				+#include "hw/xen/interface/version.h"
			
 
				+#include "hw/xen/interface/sched.h"
			
 
				+#include "hw/xen/interface/memory.h"
			
 
				+#include "hw/xen/interface/hvm/hvm_op.h"
			
 
				+#include "hw/xen/interface/hvm/params.h"
			
 
				+#include "hw/xen/interface/vcpu.h"
			
 
				+#include "hw/xen/interface/event_channel.h"
			
 
				+#include "hw/xen/interface/grant_table.h"
			
 
				+
			
 
				+#include "xen-compat.h"
			
 
				+
			
 
				+static void xen_vcpu_singleshot_timer_event(void *opaque);
			
 
				+static void xen_vcpu_periodic_timer_event(void *opaque);
			
 
				+
			
 
				+#ifdef TARGET_X86_64
			
 
				+#define hypercall_compat32(longmode) (!(longmode))
			
 
				+#else
			
 
				+#define hypercall_compat32(longmode) (false)
			
 
				+#endif
			
 
				+
			
 
				+static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
			
 
				+                           size_t *len, bool is_write)
			
 
				+{
			
 
				+        struct kvm_translation tr = {
			
 
				+            .linear_address = gva,
			
 
				+        };
			
 
				+
			
 
				+        if (len) {
			
 
				+            *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
			
 
				+        }
			
 
				+
			
 
				+        if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
			
 
				+            (is_write && !tr.writeable)) {
			
 
				+            return false;
			
 
				+        }
			
 
				+        *gpa = tr.physical_address;
			
 
				+        return true;
			
 
				+}
			
 
				+
			
 
				+static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
			
 
				+                      bool is_write)
			
 
				+{
			
 
				+    uint8_t *buf = (uint8_t *)_buf;
			
 
				+    uint64_t gpa;
			
 
				+    size_t len;
			
 
				+
			
 
				+    while (sz) {
			
 
				+        if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        if (len > sz) {
			
 
				+            len = sz;
			
 
				+        }
			
 
				+
			
 
				+        cpu_physical_memory_rw(gpa, buf, len, is_write);
			
 
				+
			
 
				+        buf += len;
			
 
				+        sz -= len;
			
 
				+        gva += len;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
			
 
				+                                    size_t sz)
			
 
				+{
			
 
				+    return kvm_gva_rw(cs, gva, buf, sz, false);
			
 
				+}
			
 
				+
			
 
				+static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
			
 
				+                                  size_t sz)
			
 
				+{
			
 
				+    return kvm_gva_rw(cs, gva, buf, sz, true);
			
 
				+}
			
 
				+
			
 
				+int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
			
 
				+{
			
 
				+    const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
			
 
				+        KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
			
 
				+    struct kvm_xen_hvm_config cfg = {
			
 
				+        .msr = hypercall_msr,
			
 
				+        .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
			
 
				+    };
			
 
				+    int xen_caps, ret;
			
 
				+
			
 
				+    xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
			
 
				+    if (required_caps & ~xen_caps) {
			
 
				+        error_report("kvm: Xen HVM guest support not present or insufficient");
			
 
				+        return -ENOSYS;
			
 
				+    }
			
 
				+
			
 
				+    if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
			
 
				+        struct kvm_xen_hvm_attr ha = {
			
 
				+            .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
			
 
				+            .u.xen_version = s->xen_version,
			
 
				+        };
			
 
				+        (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
			
 
				+
			
 
				+        cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
			
 
				+    }
			
 
				+
			
 
				+    ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
			
 
				+    if (ret < 0) {
			
 
				+        error_report("kvm: Failed to enable Xen HVM support: %s",
			
 
				+                     strerror(-ret));
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    /* If called a second time, don't repeat the rest of the setup. */
			
 
				+    if (s->xen_caps) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
			
 
				+     * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
			
 
				+     *
			
 
				+     * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
			
 
				+     * such things to be polled at precisely the right time. We *could* do
			
 
				+     * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
			
 
				+     * the moment the IRQ is acked, and see if it should be reasserted.
			
 
				+     *
			
 
				+     * But the in-kernel irqchip is deprecated, so we're unlikely to add
			
 
				+     * that support in the kernel. Insist on using the split irqchip mode
			
 
				+     * instead.
			
 
				+     *
			
 
				+     * This leaves us polling for the level going low in QEMU, which lacks
			
 
				+     * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
			
 
				+     * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
			
 
				+     * the device (for which it has to unmap the device and trap access, for
			
 
				+     * some period after an IRQ!!). In the Xen case, we do it on exit from
			
 
				+     * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
			
 
				+     * Which is kind of icky, but less so than the VFIO one. I may fix them
			
 
				+     * both later...
			
 
				+     */
			
 
				+    if (!kvm_kernel_irqchip_split()) {
			
 
				+        error_report("kvm: Xen support requires kernel-irqchip=split");
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    s->xen_caps = xen_caps;
			
 
				+
			
 
				+    /* Tell fw_cfg to notify the BIOS to reserve the range. */
			
 
				+    ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE,
			
 
				+                         E820_RESERVED);
			
 
				+    if (ret < 0) {
			
 
				+        fprintf(stderr, "e820_add_entry() table is full\n");
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    /* The page couldn't be overlaid until KVM was initialized */
			
 
				+    xen_xenstore_reset();
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_xen_init_vcpu(CPUState *cs)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+    int err;
			
 
				+
			
 
				+    /*
			
 
				+     * The kernel needs to know the Xen/ACPI vCPU ID because that's
			
 
				+     * what the guest uses in hypercalls such as timers. It doesn't
			
 
				+     * match the APIC ID which is generally used for talking to the
			
 
				+     * kernel about vCPUs. And if vCPU threads race with creating
			
 
				+     * their KVM vCPUs out of order, it doesn't necessarily match
			
 
				+     * with the kernel's internal vCPU indices either.
			
 
				+     */
			
 
				+    if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        struct kvm_xen_vcpu_attr va = {
			
 
				+            .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
			
 
				+            .u.vcpu_id = cs->cpu_index,
			
 
				+        };
			
 
				+        err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
			
 
				+        if (err) {
			
 
				+            error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
			
 
				+                         strerror(-err));
			
 
				+            return err;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    env->xen_vcpu_info_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_info_default_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_time_info_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_runstate_gpa = INVALID_GPA;
			
 
				+
			
 
				+    qemu_mutex_init(&env->xen_timers_lock);
			
 
				+    env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
			
 
				+                                             xen_vcpu_singleshot_timer_event,
			
 
				+                                             cpu);
			
 
				+    if (!env->xen_singleshot_timer) {
			
 
				+        return -ENOMEM;
			
 
				+    }
			
 
				+    env->xen_singleshot_timer->opaque = cs;
			
 
				+
			
 
				+    env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
			
 
				+                                           xen_vcpu_periodic_timer_event,
			
 
				+                                           cpu);
			
 
				+    if (!env->xen_periodic_timer) {
			
 
				+        return -ENOMEM;
			
 
				+    }
			
 
				+    env->xen_periodic_timer->opaque = cs;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+uint32_t kvm_xen_get_caps(void)
			
 
				+{
			
 
				+    return kvm_state->xen_caps;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                     int cmd, uint64_t arg)
			
 
				+{
			
 
				+    int err = 0;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case XENVER_get_features: {
			
 
				+        struct xen_feature_info fi;
			
 
				+
			
 
				+        /* No need for 32/64 compat handling */
			
 
				+        qemu_build_assert(sizeof(fi) == 8);
			
 
				+
			
 
				+        err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
			
 
				+        if (err) {
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        fi.submap = 0;
			
 
				+        if (fi.submap_idx == 0) {
			
 
				+            fi.submap |= 1 << XENFEAT_writable_page_tables |
			
 
				+                         1 << XENFEAT_writable_descriptor_tables |
			
 
				+                         1 << XENFEAT_auto_translated_physmap |
			
 
				+                         1 << XENFEAT_supervisor_mode_kernel |
			
 
				+                         1 << XENFEAT_hvm_callback_vector |
			
 
				+                         1 << XENFEAT_hvm_safe_pvclock |
			
 
				+                         1 << XENFEAT_hvm_pirqs;
			
 
				+        }
			
 
				+
			
 
				+        err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
			
 
				+{
			
 
				+    struct kvm_xen_vcpu_attr xhsi;
			
 
				+
			
 
				+    xhsi.type = type;
			
 
				+    xhsi.u.gpa = gpa;
			
 
				+
			
 
				+    trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
			
 
				+
			
 
				+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
			
 
				+}
			
 
				+
			
 
				+static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
			
 
				+{
			
 
				+    uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
			
 
				+    struct kvm_xen_vcpu_attr xva;
			
 
				+
			
 
				+    xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
			
 
				+    xva.u.vector = vector;
			
 
				+
			
 
				+    trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
			
 
				+
			
 
				+    return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_callback_vector = data.host_int;
			
 
				+
			
 
				+    if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        kvm_xen_set_vcpu_callback_vector(cs);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int set_vcpu_info(CPUState *cs, uint64_t gpa)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+    MemoryRegionSection mrs = { .mr = NULL };
			
 
				+    void *vcpu_info_hva = NULL;
			
 
				+    int ret;
			
 
				+
			
 
				+    ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
			
 
				+    if (ret || gpa == INVALID_GPA) {
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    mrs = memory_region_find(get_system_memory(), gpa,
			
 
				+                             sizeof(struct vcpu_info));
			
 
				+    if (mrs.mr && mrs.mr->ram_block &&
			
 
				+        !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
			
 
				+        vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
			
 
				+                                         mrs.offset_within_region);
			
 
				+    }
			
 
				+    if (!vcpu_info_hva) {
			
 
				+        if (mrs.mr) {
			
 
				+            memory_region_unref(mrs.mr);
			
 
				+            mrs.mr = NULL;
			
 
				+        }
			
 
				+        ret = -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+ out:
			
 
				+    if (env->xen_vcpu_info_mr) {
			
 
				+        memory_region_unref(env->xen_vcpu_info_mr);
			
 
				+    }
			
 
				+    env->xen_vcpu_info_hva = vcpu_info_hva;
			
 
				+    env->xen_vcpu_info_mr = mrs.mr;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_info_default_gpa = data.host_ulong;
			
 
				+
			
 
				+    /* Changing the default does nothing if a vcpu_info was explicitly set. */
			
 
				+    if (env->xen_vcpu_info_gpa == INVALID_GPA) {
			
 
				+        set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_info_gpa = data.host_ulong;
			
 
				+
			
 
				+    set_vcpu_info(cs, env->xen_vcpu_info_gpa);
			
 
				+}
			
 
				+
			
 
				+void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
			
 
				+{
			
 
				+    CPUState *cs = qemu_get_cpu(vcpu_id);
			
 
				+    if (!cs) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    return X86_CPU(cs)->env.xen_vcpu_info_hva;
			
 
				+}
			
 
				+
			
 
				+void kvm_xen_maybe_deassert_callback(CPUState *cs)
			
 
				+{
			
 
				+    CPUX86State *env = &X86_CPU(cs)->env;
			
 
				+    struct vcpu_info *vi = env->xen_vcpu_info_hva;
			
 
				+    if (!vi) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
			
 
				+    if (!vi->evtchn_upcall_pending) {
			
 
				+        qemu_mutex_lock_iothread();
			
 
				+        /*
			
 
				+         * Check again now we have the lock, because it may have been
			
 
				+         * asserted in the interim. And we don't want to take the lock
			
 
				+         * every time because this is a fast path.
			
 
				+         */
			
 
				+        if (!vi->evtchn_upcall_pending) {
			
 
				+            X86_CPU(cs)->env.xen_callback_asserted = false;
			
 
				+            xen_evtchn_set_callback_level(0);
			
 
				+        }
			
 
				+        qemu_mutex_unlock_iothread();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void kvm_xen_set_callback_asserted(void)
			
 
				+{
			
 
				+    CPUState *cs = qemu_get_cpu(0);
			
 
				+
			
 
				+    if (cs) {
			
 
				+        X86_CPU(cs)->env.xen_callback_asserted = true;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
			
 
				+{
			
 
				+    CPUState *cs = qemu_get_cpu(vcpu_id);
			
 
				+    uint8_t vector;
			
 
				+
			
 
				+    if (!cs) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
			
 
				+    if (vector) {
			
 
				+        /*
			
 
				+         * The per-vCPU callback vector injected via lapic. Just
			
 
				+         * deliver it as an MSI.
			
 
				+         */
			
 
				+        MSIMessage msg = {
			
 
				+            .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
			
 
				+            .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
			
 
				+        };
			
 
				+        kvm_irqchip_send_msi(kvm_state, msg);
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    switch (type) {
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_VECTOR:
			
 
				+        /*
			
 
				+         * If the evtchn_upcall_pending field in the vcpu_info is set, then
			
 
				+         * KVM will automatically deliver the vector on entering the vCPU
			
 
				+         * so all we have to do is kick it out.
			
 
				+         */
			
 
				+        qemu_cpu_kick(cs);
			
 
				+        break;
			
 
				+
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_GSI:
			
 
				+    case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
			
 
				+        if (vcpu_id == 0) {
			
 
				+            xen_evtchn_set_callback_level(1);
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int kvm_xen_set_vcpu_timer(CPUState *cs)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    struct kvm_xen_vcpu_attr va = {
			
 
				+        .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
			
 
				+        .u.timer.port = env->xen_virq[VIRQ_TIMER],
			
 
				+        .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
			
 
				+        .u.timer.expires_ns = env->xen_singleshot_timer_ns,
			
 
				+    };
			
 
				+
			
 
				+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    kvm_xen_set_vcpu_timer(cs);
			
 
				+}
			
 
				+
			
 
				+int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
			
 
				+{
			
 
				+    CPUState *cs = qemu_get_cpu(vcpu_id);
			
 
				+
			
 
				+    if (!cs) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    /* cpu.h doesn't include the actual Xen header. */
			
 
				+    qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
			
 
				+
			
 
				+    if (virq >= NR_VIRQS) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    if (port && X86_CPU(cs)->env.xen_virq[virq]) {
			
 
				+        return -EEXIST;
			
 
				+    }
			
 
				+
			
 
				+    X86_CPU(cs)->env.xen_virq[virq] = port;
			
 
				+    if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        async_run_on_cpu(cs, do_set_vcpu_timer_virq,
			
 
				+                         RUN_ON_CPU_HOST_INT(port));
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_time_info_gpa = data.host_ulong;
			
 
				+
			
 
				+    kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
			
 
				+                          env->xen_vcpu_time_info_gpa);
			
 
				+}
			
 
				+
			
 
				+static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_runstate_gpa = data.host_ulong;
			
 
				+
			
 
				+    kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
			
 
				+                          env->xen_vcpu_runstate_gpa);
			
 
				+}
			
 
				+
			
 
				+static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+
			
 
				+    env->xen_vcpu_info_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_info_default_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_time_info_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_runstate_gpa = INVALID_GPA;
			
 
				+    env->xen_vcpu_callback_vector = 0;
			
 
				+    env->xen_singleshot_timer_ns = 0;
			
 
				+    memset(env->xen_virq, 0, sizeof(env->xen_virq));
			
 
				+
			
 
				+    set_vcpu_info(cs, INVALID_GPA);
			
 
				+    kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
			
 
				+                          INVALID_GPA);
			
 
				+    kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
			
 
				+                          INVALID_GPA);
			
 
				+    if (kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        kvm_xen_set_vcpu_callback_vector(cs);
			
 
				+        kvm_xen_set_vcpu_timer(cs);
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+static int xen_set_shared_info(uint64_t gfn)
			
 
				+{
			
 
				+    uint64_t gpa = gfn << TARGET_PAGE_BITS;
			
 
				+    int i, err;
			
 
				+
			
 
				+    QEMU_IOTHREAD_LOCK_GUARD();
			
 
				+
			
 
				+    /*
			
 
				+     * The xen_overlay device tells KVM about it too, since it had to
			
 
				+     * do that on migration load anyway (unless we're going to jump
			
 
				+     * through lots of hoops to maintain the fiction that this isn't
			
 
				+     * KVM-specific.
			
 
				+     */
			
 
				+    err = xen_overlay_map_shinfo_page(gpa);
			
 
				+    if (err) {
			
 
				+            return err;
			
 
				+    }
			
 
				+
			
 
				+    trace_kvm_xen_set_shared_info(gfn);
			
 
				+
			
 
				+    for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
			
 
				+        CPUState *cpu = qemu_get_cpu(i);
			
 
				+        if (cpu) {
			
 
				+            async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
			
 
				+                             RUN_ON_CPU_HOST_ULONG(gpa));
			
 
				+        }
			
 
				+        gpa += sizeof(vcpu_info_t);
			
 
				+    }
			
 
				+
			
 
				+    return err;
			
 
				+}
			
 
				+
			
 
				+static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
			
 
				+{
			
 
				+    switch (space) {
			
 
				+    case XENMAPSPACE_shared_info:
			
 
				+        if (idx > 0) {
			
 
				+            return -EINVAL;
			
 
				+        }
			
 
				+        return xen_set_shared_info(gfn);
			
 
				+
			
 
				+    case XENMAPSPACE_grant_table:
			
 
				+        return xen_gnttab_map_page(idx, gfn);
			
 
				+
			
 
				+    case XENMAPSPACE_gmfn:
			
 
				+    case XENMAPSPACE_gmfn_range:
			
 
				+        return -ENOTSUP;
			
 
				+
			
 
				+    case XENMAPSPACE_gmfn_foreign:
			
 
				+    case XENMAPSPACE_dev_mmio:
			
 
				+        return -EPERM;
			
 
				+
			
 
				+    default:
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                             uint64_t arg)
			
 
				+{
			
 
				+    struct xen_add_to_physmap xatp;
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+
			
 
				+    if (hypercall_compat32(exit->u.hcall.longmode)) {
			
 
				+        struct compat_xen_add_to_physmap xatp32;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        xatp.domid = xatp32.domid;
			
 
				+        xatp.size = xatp32.size;
			
 
				+        xatp.space = xatp32.space;
			
 
				+        xatp.idx = xatp32.idx;
			
 
				+        xatp.gpfn = xatp32.gpfn;
			
 
				+    } else {
			
 
				+        if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
			
 
				+}
			
 
				+
			
 
				+static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                   uint64_t arg)
			
 
				+{
			
 
				+    struct xen_add_to_physmap_batch xatpb;
			
 
				+    unsigned long idxs_gva, gpfns_gva, errs_gva;
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    size_t op_sz;
			
 
				+
			
 
				+    if (hypercall_compat32(exit->u.hcall.longmode)) {
			
 
				+        struct compat_xen_add_to_physmap_batch xatpb32;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        xatpb.domid = xatpb32.domid;
			
 
				+        xatpb.space = xatpb32.space;
			
 
				+        xatpb.size = xatpb32.size;
			
 
				+
			
 
				+        idxs_gva = xatpb32.idxs.c;
			
 
				+        gpfns_gva = xatpb32.gpfns.c;
			
 
				+        errs_gva = xatpb32.errs.c;
			
 
				+        op_sz = sizeof(uint32_t);
			
 
				+    } else {
			
 
				+        if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        op_sz = sizeof(unsigned long);
			
 
				+        idxs_gva = (unsigned long)xatpb.idxs.p;
			
 
				+        gpfns_gva = (unsigned long)xatpb.gpfns.p;
			
 
				+        errs_gva = (unsigned long)xatpb.errs.p;
			
 
				+    }
			
 
				+
			
 
				+    if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
			
 
				+        return -ESRCH;
			
 
				+    }
			
 
				+
			
 
				+    /* Explicitly invalid for the batch op. Not that we implement it anyway. */
			
 
				+    if (xatpb.space == XENMAPSPACE_gmfn_range) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    while (xatpb.size--) {
			
 
				+        unsigned long idx = 0;
			
 
				+        unsigned long gpfn = 0;
			
 
				+        int err;
			
 
				+
			
 
				+        /* For 32-bit compat this only copies the low 32 bits of each */
			
 
				+        if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
			
 
				+            kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        idxs_gva += op_sz;
			
 
				+        gpfns_gva += op_sz;
			
 
				+
			
 
				+        err = add_to_physmap_one(xatpb.space, idx, gpfn);
			
 
				+
			
 
				+        if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
			
 
				+            return -EFAULT;
			
 
				+        }
			
 
				+        errs_gva += sizeof(err);
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                   int cmd, uint64_t arg)
			
 
				+{
			
 
				+    int err;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case XENMEM_add_to_physmap:
			
 
				+        err = do_add_to_physmap(exit, cpu, arg);
			
 
				+        break;
			
 
				+
			
 
				+    case XENMEM_add_to_physmap_batch:
			
 
				+        err = do_add_to_physmap_batch(exit, cpu, arg);
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                             uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    struct xen_hvm_param hp;
			
 
				+    int err = 0;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(hp) == 16);
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
			
 
				+        err = -EFAULT;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
			
 
				+        err = -ESRCH;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    switch (hp.index) {
			
 
				+    case HVM_PARAM_CALLBACK_IRQ:
			
 
				+        qemu_mutex_lock_iothread();
			
 
				+        err = xen_evtchn_set_callback_param(hp.value);
			
 
				+        qemu_mutex_unlock_iothread();
			
 
				+        xen_set_long_mode(exit->u.hcall.longmode);
			
 
				+        break;
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+out:
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                             uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    struct xen_hvm_param hp;
			
 
				+    int err = 0;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(hp) == 16);
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
			
 
				+        err = -EFAULT;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
			
 
				+        err = -ESRCH;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    switch (hp.index) {
			
 
				+    case HVM_PARAM_STORE_PFN:
			
 
				+        hp.value = XEN_SPECIAL_PFN(XENSTORE);
			
 
				+        break;
			
 
				+    case HVM_PARAM_STORE_EVTCHN:
			
 
				+        hp.value = xen_xenstore_get_port();
			
 
				+        break;
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    if (kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
			
 
				+        err = -EFAULT;
			
 
				+    }
			
 
				+out:
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
			
 
				+                                              X86CPU *cpu, uint64_t arg)
			
 
				+{
			
 
				+    struct xen_hvm_evtchn_upcall_vector up;
			
 
				+    CPUState *target_cs;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(up) == 8);
			
 
				+
			
 
				+    if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (up.vector < 0x10) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    target_cs = qemu_get_cpu(up.vcpu);
			
 
				+    if (!target_cs) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
			
 
				+                     RUN_ON_CPU_HOST_INT(up.vector));
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                 int cmd, uint64_t arg)
			
 
				+{
			
 
				+    int ret = -ENOSYS;
			
 
				+    switch (cmd) {
			
 
				+    case HVMOP_set_evtchn_upcall_vector:
			
 
				+        ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
			
 
				+                                                 exit->u.hcall.params[0]);
			
 
				+        break;
			
 
				+
			
 
				+    case HVMOP_pagetable_dying:
			
 
				+        ret = -ENOSYS;
			
 
				+        break;
			
 
				+
			
 
				+    case HVMOP_set_param:
			
 
				+        return handle_set_param(exit, cpu, arg);
			
 
				+
			
 
				+    case HVMOP_get_param:
			
 
				+        return handle_get_param(exit, cpu, arg);
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = ret;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
			
 
				+                                     uint64_t arg)
			
 
				+{
			
 
				+    struct vcpu_register_vcpu_info rvi;
			
 
				+    uint64_t gpa;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(rvi) == 16);
			
 
				+    qemu_build_assert(sizeof(struct vcpu_info) == 64);
			
 
				+
			
 
				+    if (!target) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
			
 
				+    async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
			
 
				+                                          uint64_t arg)
			
 
				+{
			
 
				+    struct vcpu_register_time_memory_area tma;
			
 
				+    uint64_t gpa;
			
 
				+    size_t len;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(tma) == 8);
			
 
				+    qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
			
 
				+
			
 
				+    if (!target) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Xen actually uses the GVA and does the translation through the guest
			
 
				+     * page tables each time. But Linux/KVM uses the GPA, on the assumption
			
 
				+     * that guests only ever use *global* addresses (kernel virtual addresses)
			
 
				+     * for it. If Linux is changed to redo the GVA→GPA translation each time,
			
 
				+     * it will offer a new vCPU attribute for that, and we'll use it instead.
			
 
				+     */
			
 
				+    if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
			
 
				+        len < sizeof(struct vcpu_time_info)) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
			
 
				+                     RUN_ON_CPU_HOST_ULONG(gpa));
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
			
 
				+                                         uint64_t arg)
			
 
				+{
			
 
				+    struct vcpu_register_runstate_memory_area rma;
			
 
				+    uint64_t gpa;
			
 
				+    size_t len;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(rma) == 8);
			
 
				+    /* The runstate area actually does change size, but Linux copes. */
			
 
				+
			
 
				+    if (!target) {
			
 
				+        return -ENOENT;
			
 
				+    }
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
			
 
				+    if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
			
 
				+                     RUN_ON_CPU_HOST_ULONG(gpa));
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static uint64_t kvm_get_current_ns(void)
			
 
				+{
			
 
				+    struct kvm_clock_data data;
			
 
				+    int ret;
			
 
				+
			
 
				+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
			
 
				+    if (ret < 0) {
			
 
				+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
			
 
				+                abort();
			
 
				+    }
			
 
				+
			
 
				+    return data.clock;
			
 
				+}
			
 
				+
			
 
				+static void xen_vcpu_singleshot_timer_event(void *opaque)
			
 
				+{
			
 
				+    CPUState *cpu = opaque;
			
 
				+    CPUX86State *env = &X86_CPU(cpu)->env;
			
 
				+    uint16_t port = env->xen_virq[VIRQ_TIMER];
			
 
				+
			
 
				+    if (likely(port)) {
			
 
				+        xen_evtchn_set_port(port);
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&env->xen_timers_lock);
			
 
				+    env->xen_singleshot_timer_ns = 0;
			
 
				+    qemu_mutex_unlock(&env->xen_timers_lock);
			
 
				+}
			
 
				+
			
 
				+static void xen_vcpu_periodic_timer_event(void *opaque)
			
 
				+{
			
 
				+    CPUState *cpu = opaque;
			
 
				+    CPUX86State *env = &X86_CPU(cpu)->env;
			
 
				+    uint16_t port = env->xen_virq[VIRQ_TIMER];
			
 
				+    int64_t qemu_now;
			
 
				+
			
 
				+    if (likely(port)) {
			
 
				+        xen_evtchn_set_port(port);
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&env->xen_timers_lock);
			
 
				+
			
 
				+    qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
			
 
				+    timer_mod_ns(env->xen_periodic_timer,
			
 
				+                 qemu_now + env->xen_periodic_timer_period);
			
 
				+
			
 
				+    qemu_mutex_unlock(&env->xen_timers_lock);
			
 
				+}
			
 
				+
			
 
				+static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
			
 
				+{
			
 
				+    CPUX86State *tenv = &X86_CPU(target)->env;
			
 
				+    int64_t qemu_now;
			
 
				+
			
 
				+    timer_del(tenv->xen_periodic_timer);
			
 
				+
			
 
				+    qemu_mutex_lock(&tenv->xen_timers_lock);
			
 
				+
			
 
				+    qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
			
 
				+    timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
			
 
				+    tenv->xen_periodic_timer_period = period_ns;
			
 
				+
			
 
				+    qemu_mutex_unlock(&tenv->xen_timers_lock);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+#define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
			
 
				+#define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
			
 
				+#define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
			
 
				+/* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
			
 
				+#define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
			
 
				+
			
 
				+static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
			
 
				+                                     uint64_t arg)
			
 
				+{
			
 
				+    struct vcpu_set_periodic_timer spt;
			
 
				+
			
 
				+    qemu_build_assert(sizeof(spt) == 8);
			
 
				+    if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
			
 
				+        return -EINVAL;
			
 
				+    }
			
 
				+
			
 
				+    return do_set_periodic_timer(target, spt.period_ns);
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_stop_periodic_timer(CPUState *target)
			
 
				+{
			
 
				+    CPUX86State *tenv = &X86_CPU(target)->env;
			
 
				+
			
 
				+    qemu_mutex_lock(&tenv->xen_timers_lock);
			
 
				+
			
 
				+    timer_del(tenv->xen_periodic_timer);
			
 
				+    tenv->xen_periodic_timer_period = 0;
			
 
				+
			
 
				+    qemu_mutex_unlock(&tenv->xen_timers_lock);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
			
 
				+                                   bool future, bool linux_wa)
			
 
				+{
			
 
				+    CPUX86State *env = &X86_CPU(cs)->env;
			
 
				+    int64_t now = kvm_get_current_ns();
			
 
				+    int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
			
 
				+    int64_t delta = timeout_abs - now;
			
 
				+
			
 
				+    if (future && timeout_abs < now) {
			
 
				+        return -ETIME;
			
 
				+    }
			
 
				+
			
 
				+    if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
			
 
				+                             (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
			
 
				+        /*
			
 
				+         * Xen has a 'Linux workaround' in do_set_timer_op() which checks
			
 
				+         * for negative absolute timeout values (caused by integer
			
 
				+         * overflow), and for values about 13 days in the future (2^50ns)
			
 
				+         * which would be caused by jiffies overflow. For those cases, it
			
 
				+         * sets the timeout 100ms in the future (not *too* soon, since if
			
 
				+         * a guest really did set a long timeout on purpose we don't want
			
 
				+         * to keep churning CPU time by waking it up).
			
 
				+         */
			
 
				+        delta = (100 * SCALE_MS);
			
 
				+        timeout_abs = now + delta;
			
 
				+    }
			
 
				+
			
 
				+    qemu_mutex_lock(&env->xen_timers_lock);
			
 
				+
			
 
				+    timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
			
 
				+    env->xen_singleshot_timer_ns = now + delta;
			
 
				+
			
 
				+    qemu_mutex_unlock(&env->xen_timers_lock);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
			
 
				+{
			
 
				+    struct vcpu_set_singleshot_timer sst = { 0 };
			
 
				+
			
 
				+    /*
			
 
				+     * The struct is a uint64_t followed by a uint32_t. On 32-bit that
			
 
				+     * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
			
 
				+     * that get used are identical, and there's four bytes of padding
			
 
				+     * unused at the end. For true Xen compatibility we should attempt
			
 
				+     * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
			
 
				+     * if we can't get the padding too. But that's daft. Just copy what
			
 
				+     * we need.
			
 
				+     */
			
 
				+    qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
			
 
				+    qemu_build_assert(sizeof(sst) >= 12);
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
			
 
				+                                   !!(sst.flags & VCPU_SSHOTTMR_future),
			
 
				+                                   false);
			
 
				+}
			
 
				+
			
 
				+static int vcpuop_stop_singleshot_timer(CPUState *cs)
			
 
				+{
			
 
				+    CPUX86State *env = &X86_CPU(cs)->env;
			
 
				+
			
 
				+    qemu_mutex_lock(&env->xen_timers_lock);
			
 
				+
			
 
				+    timer_del(env->xen_singleshot_timer);
			
 
				+    env->xen_singleshot_timer_ns = 0;
			
 
				+
			
 
				+    qemu_mutex_unlock(&env->xen_timers_lock);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                       uint64_t timeout)
			
 
				+{
			
 
				+    int err;
			
 
				+
			
 
				+    if (unlikely(timeout == 0)) {
			
 
				+        err = vcpuop_stop_singleshot_timer(CPU(cpu));
			
 
				+    } else {
			
 
				+        err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
			
 
				+    }
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                  int cmd, int vcpu_id, uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
			
 
				+    int err;
			
 
				+
			
 
				+    if (!dest) {
			
 
				+        err = -ENOENT;
			
 
				+        goto out;
			
 
				+    }
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case VCPUOP_register_runstate_memory_area:
			
 
				+        err = vcpuop_register_runstate_info(cs, dest, arg);
			
 
				+        break;
			
 
				+    case VCPUOP_register_vcpu_time_memory_area:
			
 
				+        err = vcpuop_register_vcpu_time_info(cs, dest, arg);
			
 
				+        break;
			
 
				+    case VCPUOP_register_vcpu_info:
			
 
				+        err = vcpuop_register_vcpu_info(cs, dest, arg);
			
 
				+        break;
			
 
				+    case VCPUOP_set_singleshot_timer: {
			
 
				+        if (cs->cpu_index == vcpu_id) {
			
 
				+            err = vcpuop_set_singleshot_timer(dest, arg);
			
 
				+        } else {
			
 
				+            err = -EINVAL;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case VCPUOP_stop_singleshot_timer:
			
 
				+        if (cs->cpu_index == vcpu_id) {
			
 
				+            err = vcpuop_stop_singleshot_timer(dest);
			
 
				+        } else {
			
 
				+            err = -EINVAL;
			
 
				+        }
			
 
				+        break;
			
 
				+    case VCPUOP_set_periodic_timer: {
			
 
				+        err = vcpuop_set_periodic_timer(cs, dest, arg);
			
 
				+        break;
			
 
				+    }
			
 
				+    case VCPUOP_stop_periodic_timer:
			
 
				+        err = vcpuop_stop_periodic_timer(dest);
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+ out:
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                    int cmd, uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    int err = -ENOSYS;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case EVTCHNOP_init_control:
			
 
				+    case EVTCHNOP_expand_array:
			
 
				+    case EVTCHNOP_set_priority:
			
 
				+        /* We do not support FIFO channels at this point */
			
 
				+        err = -ENOSYS;
			
 
				+        break;
			
 
				+
			
 
				+    case EVTCHNOP_status: {
			
 
				+        struct evtchn_status status;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(status) == 24);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_status_op(&status);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_close: {
			
 
				+        struct evtchn_close close;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(close) == 4);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_close_op(&close);
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_unmask: {
			
 
				+        struct evtchn_unmask unmask;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(unmask) == 4);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_unmask_op(&unmask);
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_bind_virq: {
			
 
				+        struct evtchn_bind_virq virq;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(virq) == 12);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_bind_virq_op(&virq);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_bind_pirq: {
			
 
				+        struct evtchn_bind_pirq pirq;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(pirq) == 12);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_bind_pirq_op(&pirq);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_bind_ipi: {
			
 
				+        struct evtchn_bind_ipi ipi;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(ipi) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_bind_ipi_op(&ipi);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_send: {
			
 
				+        struct evtchn_send send;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(send) == 4);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_send_op(&send);
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_alloc_unbound: {
			
 
				+        struct evtchn_alloc_unbound alloc;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(alloc) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_alloc_unbound_op(&alloc);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_bind_interdomain: {
			
 
				+        struct evtchn_bind_interdomain interdomain;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(interdomain) == 12);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_bind_interdomain_op(&interdomain);
			
 
				+        if (!err &&
			
 
				+            kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_bind_vcpu: {
			
 
				+        struct evtchn_bind_vcpu vcpu;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(vcpu) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_bind_vcpu_op(&vcpu);
			
 
				+        break;
			
 
				+    }
			
 
				+    case EVTCHNOP_reset: {
			
 
				+        struct evtchn_reset reset;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(reset) == 2);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_evtchn_reset_op(&reset);
			
 
				+        break;
			
 
				+    }
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+int kvm_xen_soft_reset(void)
			
 
				+{
			
 
				+    CPUState *cpu;
			
 
				+    int err;
			
 
				+
			
 
				+    assert(qemu_mutex_iothread_locked());
			
 
				+
			
 
				+    trace_kvm_xen_soft_reset();
			
 
				+
			
 
				+    err = xen_evtchn_soft_reset();
			
 
				+    if (err) {
			
 
				+        return err;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
			
 
				+     * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
			
 
				+     * to deliver to the timer interrupt and treats that as 'disabled'.
			
 
				+     */
			
 
				+    err = xen_evtchn_set_callback_param(0);
			
 
				+    if (err) {
			
 
				+        return err;
			
 
				+    }
			
 
				+
			
 
				+    CPU_FOREACH(cpu) {
			
 
				+        async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
			
 
				+    }
			
 
				+
			
 
				+    err = xen_overlay_map_shinfo_page(INVALID_GFN);
			
 
				+    if (err) {
			
 
				+        return err;
			
 
				+    }
			
 
				+
			
 
				+    err = xen_xenstore_reset();
			
 
				+    if (err) {
			
 
				+        return err;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int schedop_shutdown(CPUState *cs, uint64_t arg)
			
 
				+{
			
 
				+    struct sched_shutdown shutdown;
			
 
				+    int ret = 0;
			
 
				+
			
 
				+    /* No need for 32/64 compat handling */
			
 
				+    qemu_build_assert(sizeof(shutdown) == 4);
			
 
				+
			
 
				+    if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
			
 
				+        return -EFAULT;
			
 
				+    }
			
 
				+
			
 
				+    switch (shutdown.reason) {
			
 
				+    case SHUTDOWN_crash:
			
 
				+        cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
			
 
				+        qemu_system_guest_panicked(NULL);
			
 
				+        break;
			
 
				+
			
 
				+    case SHUTDOWN_reboot:
			
 
				+        qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
			
 
				+        break;
			
 
				+
			
 
				+    case SHUTDOWN_poweroff:
			
 
				+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
			
 
				+        break;
			
 
				+
			
 
				+    case SHUTDOWN_soft_reset:
			
 
				+        qemu_mutex_lock_iothread();
			
 
				+        ret = kvm_xen_soft_reset();
			
 
				+        qemu_mutex_unlock_iothread();
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        ret = -EINVAL;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                   int cmd, uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    int err = -ENOSYS;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case SCHEDOP_shutdown:
			
 
				+        err = schedop_shutdown(cs, arg);
			
 
				+        break;
			
 
				+
			
 
				+    case SCHEDOP_poll:
			
 
				+        /*
			
 
				+         * Linux will panic if this doesn't work. Just yield; it's not
			
 
				+         * worth overthinking it because with event channel handling
			
 
				+         * in KVM, the kernel will intercept this and it will never
			
 
				+         * reach QEMU anyway. The semantics of the hypercall explicltly
			
 
				+         * permit spurious wakeups.
			
 
				+         */
			
 
				+    case SCHEDOP_yield:
			
 
				+        sched_yield();
			
 
				+        err = 0;
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                    int cmd, uint64_t arg, int count)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    int err;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case GNTTABOP_set_version: {
			
 
				+        struct gnttab_set_version set;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(set) == 4);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_gnttab_set_version_op(&set);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case GNTTABOP_get_version: {
			
 
				+        struct gnttab_get_version get;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(get) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_gnttab_get_version_op(&get);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case GNTTABOP_query_size: {
			
 
				+        struct gnttab_query_size size;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(size) == 16);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_gnttab_query_size_op(&size);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case GNTTABOP_setup_table:
			
 
				+    case GNTTABOP_copy:
			
 
				+    case GNTTABOP_map_grant_ref:
			
 
				+    case GNTTABOP_unmap_grant_ref:
			
 
				+    case GNTTABOP_swap_grant_ref:
			
 
				+        return false;
			
 
				+
			
 
				+    default:
			
 
				+        /* Xen explicitly returns -ENOSYS to HVM guests for all others */
			
 
				+        err = -ENOSYS;
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
			
 
				+                                     int cmd, uint64_t arg)
			
 
				+{
			
 
				+    CPUState *cs = CPU(cpu);
			
 
				+    int err;
			
 
				+
			
 
				+    switch (cmd) {
			
 
				+    case PHYSDEVOP_map_pirq: {
			
 
				+        struct physdev_map_pirq map;
			
 
				+
			
 
				+        if (hypercall_compat32(exit->u.hcall.longmode)) {
			
 
				+            struct compat_physdev_map_pirq *map32 = (void *)&map;
			
 
				+
			
 
				+            if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
			
 
				+                return -EFAULT;
			
 
				+            }
			
 
				+
			
 
				+            /*
			
 
				+             * The only thing that's different is the alignment of the
			
 
				+             * uint64_t table_base at the end, which gets padding to make
			
 
				+             * it 64-bit aligned in the 64-bit version.
			
 
				+             */
			
 
				+            qemu_build_assert(sizeof(*map32) == 36);
			
 
				+            qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
			
 
				+                              offsetof(struct compat_physdev_map_pirq, entry_nr));
			
 
				+            memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
			
 
				+        } else {
			
 
				+            if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
			
 
				+                err = -EFAULT;
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+        err = xen_physdev_map_pirq(&map);
			
 
				+        /*
			
 
				+         * Since table_base is an IN parameter and won't be changed, just
			
 
				+         * copy the size of the compat structure back to the guest.
			
 
				+         */
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &map,
			
 
				+                                    sizeof(struct compat_physdev_map_pirq))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case PHYSDEVOP_unmap_pirq: {
			
 
				+        struct physdev_unmap_pirq unmap;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(unmap) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_physdev_unmap_pirq(&unmap);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case PHYSDEVOP_eoi: {
			
 
				+        struct physdev_eoi eoi;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(eoi) == 4);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_physdev_eoi_pirq(&eoi);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case PHYSDEVOP_irq_status_query: {
			
 
				+        struct physdev_irq_status_query query;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(query) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_physdev_query_pirq(&query);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case PHYSDEVOP_get_free_pirq: {
			
 
				+        struct physdev_get_free_pirq get;
			
 
				+
			
 
				+        qemu_build_assert(sizeof(get) == 8);
			
 
				+        if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
			
 
				+            err = -EFAULT;
			
 
				+            break;
			
 
				+        }
			
 
				+
			
 
				+        err = xen_physdev_get_free_pirq(&get);
			
 
				+        if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
			
 
				+            err = -EFAULT;
			
 
				+        }
			
 
				+        break;
			
 
				+    }
			
 
				+    case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
			
 
				+        err = -ENOSYS;
			
 
				+        break;
			
 
				+
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    exit->u.hcall.result = err;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
			
 
				+{
			
 
				+    uint16_t code = exit->u.hcall.input;
			
 
				+
			
 
				+    if (exit->u.hcall.cpl > 0) {
			
 
				+        exit->u.hcall.result = -EPERM;
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    switch (code) {
			
 
				+    case __HYPERVISOR_set_timer_op:
			
 
				+        if (exit->u.hcall.longmode) {
			
 
				+            return kvm_xen_hcall_set_timer_op(exit, cpu,
			
 
				+                                              exit->u.hcall.params[0]);
			
 
				+        } else {
			
 
				+            /* In 32-bit mode, the 64-bit timer value is in two args. */
			
 
				+            uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
			
 
				+                (uint32_t)exit->u.hcall.params[0];
			
 
				+            return kvm_xen_hcall_set_timer_op(exit, cpu, val);
			
 
				+        }
			
 
				+    case __HYPERVISOR_grant_table_op:
			
 
				+        return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                       exit->u.hcall.params[1],
			
 
				+                                       exit->u.hcall.params[2]);
			
 
				+    case __HYPERVISOR_sched_op:
			
 
				+        return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                      exit->u.hcall.params[1]);
			
 
				+    case __HYPERVISOR_event_channel_op:
			
 
				+        return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                       exit->u.hcall.params[1]);
			
 
				+    case __HYPERVISOR_vcpu_op:
			
 
				+        return kvm_xen_hcall_vcpu_op(exit, cpu,
			
 
				+                                     exit->u.hcall.params[0],
			
 
				+                                     exit->u.hcall.params[1],
			
 
				+                                     exit->u.hcall.params[2]);
			
 
				+    case __HYPERVISOR_hvm_op:
			
 
				+        return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                    exit->u.hcall.params[1]);
			
 
				+    case __HYPERVISOR_memory_op:
			
 
				+        return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                       exit->u.hcall.params[1]);
			
 
				+    case __HYPERVISOR_physdev_op:
			
 
				+        return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                        exit->u.hcall.params[1]);
			
 
				+    case __HYPERVISOR_xen_version:
			
 
				+        return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
			
 
				+                                         exit->u.hcall.params[1]);
			
 
				+    default:
			
 
				+        return false;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
			
 
				+{
			
 
				+    if (exit->type != KVM_EXIT_XEN_HCALL) {
			
 
				+        return -1;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * The kernel latches the guest 32/64 mode when the MSR is used to fill
			
 
				+     * the hypercall page. So if we see a hypercall in a mode that doesn't
			
 
				+     * match our own idea of the guest mode, fetch the kernel's idea of the
			
 
				+     * "long mode" to remain in sync.
			
 
				+     */
			
 
				+    if (exit->u.hcall.longmode != xen_is_long_mode()) {
			
 
				+        xen_sync_long_mode();
			
 
				+    }
			
 
				+
			
 
				+    if (!do_kvm_xen_handle_exit(cpu, exit)) {
			
 
				+        /*
			
 
				+         * Some hypercalls will be deliberately "implemented" by returning
			
 
				+         * -ENOSYS. This case is for hypercalls which are unexpected.
			
 
				+         */
			
 
				+        exit->u.hcall.result = -ENOSYS;
			
 
				+        qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
			
 
				+                      PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
			
 
				+                      (uint64_t)exit->u.hcall.input,
			
 
				+                      (uint64_t)exit->u.hcall.params[0],
			
 
				+                      (uint64_t)exit->u.hcall.params[1],
			
 
				+                      (uint64_t)exit->u.hcall.params[2]);
			
 
				+    }
			
 
				+
			
 
				+    trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
			
 
				+                            exit->u.hcall.input, exit->u.hcall.params[0],
			
 
				+                            exit->u.hcall.params[1], exit->u.hcall.params[2],
			
 
				+                            exit->u.hcall.result);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+uint16_t kvm_xen_get_gnttab_max_frames(void)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(current_accel());
			
 
				+    return s->xen_gnttab_max_frames;
			
 
				+}
			
 
				+
			
 
				+uint16_t kvm_xen_get_evtchn_max_pirq(void)
			
 
				+{
			
 
				+    KVMState *s = KVM_STATE(current_accel());
			
 
				+    return s->xen_evtchn_max_pirq;
			
 
				+}
			
 
				+
			
 
				+int kvm_put_xen_state(CPUState *cs)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+    uint64_t gpa;
			
 
				+    int ret;
			
 
				+
			
 
				+    gpa = env->xen_vcpu_info_gpa;
			
 
				+    if (gpa == INVALID_GPA) {
			
 
				+        gpa = env->xen_vcpu_info_default_gpa;
			
 
				+    }
			
 
				+
			
 
				+    if (gpa != INVALID_GPA) {
			
 
				+        ret = set_vcpu_info(cs, gpa);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    gpa = env->xen_vcpu_time_info_gpa;
			
 
				+    if (gpa != INVALID_GPA) {
			
 
				+        ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
			
 
				+                                    gpa);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    gpa = env->xen_vcpu_runstate_gpa;
			
 
				+    if (gpa != INVALID_GPA) {
			
 
				+        ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
			
 
				+                                    gpa);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (env->xen_periodic_timer_period) {
			
 
				+        ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        /*
			
 
				+         * If the kernel has EVTCHN_SEND support then it handles timers too,
			
 
				+         * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
			
 
				+         */
			
 
				+        if (env->xen_singleshot_timer_ns) {
			
 
				+            ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
			
 
				+                                    false, false);
			
 
				+            if (ret < 0) {
			
 
				+                return ret;
			
 
				+            }
			
 
				+        }
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (env->xen_vcpu_callback_vector) {
			
 
				+        ret = kvm_xen_set_vcpu_callback_vector(cs);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (env->xen_virq[VIRQ_TIMER]) {
			
 
				+        ret = kvm_xen_set_vcpu_timer(cs);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+    }
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_get_xen_state(CPUState *cs)
			
 
				+{
			
 
				+    X86CPU *cpu = X86_CPU(cs);
			
 
				+    CPUX86State *env = &cpu->env;
			
 
				+    uint64_t gpa;
			
 
				+    int ret;
			
 
				+
			
 
				+    /*
			
 
				+     * The kernel does not mark vcpu_info as dirty when it delivers interrupts
			
 
				+     * to it. It's up to userspace to *assume* that any page shared thus is
			
 
				+     * always considered dirty. The shared_info page is different since it's
			
 
				+     * an overlay and migrated separately anyway.
			
 
				+     */
			
 
				+    gpa = env->xen_vcpu_info_gpa;
			
 
				+    if (gpa == INVALID_GPA) {
			
 
				+        gpa = env->xen_vcpu_info_default_gpa;
			
 
				+    }
			
 
				+    if (gpa != INVALID_GPA) {
			
 
				+        MemoryRegionSection mrs = memory_region_find(get_system_memory(),
			
 
				+                                                     gpa,
			
 
				+                                                     sizeof(struct vcpu_info));
			
 
				+        if (mrs.mr &&
			
 
				+            !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
			
 
				+            memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
			
 
				+                                    sizeof(struct vcpu_info));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (!kvm_xen_has_cap(EVTCHN_SEND)) {
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+     * If the kernel is accelerating timers, read out the current value of the
			
 
				+     * singleshot timer deadline.
			
 
				+     */
			
 
				+    if (env->xen_virq[VIRQ_TIMER]) {
			
 
				+        struct kvm_xen_vcpu_attr va = {
			
 
				+            .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
			
 
				+        };
			
 
				+        ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
			
 
				+        if (ret < 0) {
			
 
				+            return ret;
			
 
				+        }
			
 
				+        env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
--- a/target/i386/kvm/xen-emu.h
+++ b/target/i386/kvm/xen-emu.h
@@ -0,0 +1,33 @@
 
				+/*
			
 
				+ * Xen HVM emulation support in KVM
			
 
				+ *
			
 
				+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
			
 
				+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
			
 
				+ * See the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#ifndef QEMU_I386_KVM_XEN_EMU_H
			
 
				+#define QEMU_I386_KVM_XEN_EMU_H
			
 
				+
			
 
				+#define XEN_HYPERCALL_MSR               0x40000000
			
 
				+#define XEN_HYPERCALL_MSR_HYPERV        0x40000200
			
 
				+
			
 
				+#define XEN_CPUID_SIGNATURE        0
			
 
				+#define XEN_CPUID_VENDOR           1
			
 
				+#define XEN_CPUID_HVM_MSR          2
			
 
				+#define XEN_CPUID_TIME             3
			
 
				+#define XEN_CPUID_HVM              4
			
 
				+
			
 
				+#define XEN_VERSION(maj, min) ((maj) << 16 | (min))
			
 
				+
			
 
				+int kvm_xen_init(KVMState *s, uint32_t hypercall_msr);
			
 
				+int kvm_xen_init_vcpu(CPUState *cs);
			
 
				+int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
			
 
				+int kvm_put_xen_state(CPUState *cs);
			
 
				+int kvm_get_xen_state(CPUState *cs);
			
 
				+void kvm_xen_maybe_deassert_callback(CPUState *cs);
			
 
				+
			
 
				+#endif /* QEMU_I386_KVM_XEN_EMU_H */
			
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -6,8 +6,10 @@
 
				 #include "kvm/hyperv.h"
			
 
				 #include "hw/i386/x86.h"
			
 
				 #include "kvm/kvm_i386.h"
			
 
				+#include "hw/xen/xen.h"
			
 
				 
			
 
				 #include "sysemu/kvm.h"
			
 
				+#include "sysemu/kvm_xen.h"
			
 
				 #include "sysemu/tcg.h"
			
 
				 
			
 
				 #include "qemu/error-report.h"
			
@@ -1257,6 +1259,28 @@ static const VMStateDescription vmstate_nested_state = {
 
				     }
			
 
				 };
			
 
				 
			
 
				+static bool xen_vcpu_needed(void *opaque)
			
 
				+{
			
 
				+    return (xen_mode == XEN_EMULATE);
			
 
				+}
			
 
				+
			
 
				+static const VMStateDescription vmstate_xen_vcpu = {
			
 
				+    .name = "cpu/xen_vcpu",
			
 
				+    .version_id = 1,
			
 
				+    .minimum_version_id = 1,
			
 
				+    .needed = xen_vcpu_needed,
			
 
				+    .fields = (VMStateField[]) {
			
 
				+        VMSTATE_UINT64(env.xen_vcpu_info_gpa, X86CPU),
			
 
				+        VMSTATE_UINT64(env.xen_vcpu_info_default_gpa, X86CPU),
			
 
				+        VMSTATE_UINT64(env.xen_vcpu_time_info_gpa, X86CPU),
			
 
				+        VMSTATE_UINT64(env.xen_vcpu_runstate_gpa, X86CPU),
			
 
				+        VMSTATE_UINT8(env.xen_vcpu_callback_vector, X86CPU),
			
 
				+        VMSTATE_UINT16_ARRAY(env.xen_virq, X86CPU, XEN_NR_VIRQS),
			
 
				+        VMSTATE_UINT64(env.xen_singleshot_timer_ns, X86CPU),
			
 
				+        VMSTATE_UINT64(env.xen_periodic_timer_period, X86CPU),
			
 
				+        VMSTATE_END_OF_LIST()
			
 
				+    }
			
 
				+};
			
 
				 #endif
			
 
				 
			
 
				 static bool mcg_ext_ctl_needed(void *opaque)
			
@@ -1716,6 +1740,7 @@ const VMStateDescription vmstate_x86_cpu = {
 
				 #endif
			
 
				 #ifdef CONFIG_KVM
			
 
				         &vmstate_nested_state,
			
 
				+        &vmstate_xen_vcpu,
			
 
				 #endif
			
 
				         &vmstate_msr_tsx_ctrl,
			
 
				         &vmstate_msr_intel_sgx,
			
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -54,6 +54,7 @@ static int query_error_class(const char *cmd)
 
				         /* Only valid with accel=tcg */
			
 
				         { "x-query-jit", ERROR_CLASS_GENERIC_ERROR },
			
 
				         { "x-query-opcount", ERROR_CLASS_GENERIC_ERROR },
			
 
				+        { "xen-event-list", ERROR_CLASS_GENERIC_ERROR },
			
 
				         { NULL, -1 }
			
 
				     };
			
 
				     int i;