Procházet zdrojové kódy

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* core support for MemoryRegionCache from myself
* rules.mak speedup and cleanups from myself and Marc-Adnré
* multiboot command line fix from Vlad
* SCSI fixes from myself
* small qemu-timer speedup from myself
* x86 debugging improvements from Doug
* configurable Q35 devices from  Chao
* x86 5-level paging support from Kirill
* x86 SHA_NI support for KVM from Yi Sun
* improved kvmclock migration logic from Marcelo
* bugfixes and doc fixes from others

# gpg: Signature made Thu 22 Dec 2016 15:01:13 GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (25 commits)
  x86: implement la57 paging mode
  target-i386: Fix eflags.TF/#DB handling of syscall/sysret insns
  kvmclock: reduce kvmclock difference on migration
  kvm: sync linux headers
  scsi-disk: fix VERIFY for scsi-block
  hw/block/pflash_cfi*.c: fix confusing assert fail message
  multiboot: copy the cmdline verbatim, unescape module strings
  x86: Fix x86_64 'g' packet response to gdb from 32-bit mode.
  pc: make pit configurable
  pc: make sata configurable
  pc: make smbus configurable
  target-i386: Add Intel SHA_NI instruction support.
  block: drop remaining legacy aio functions in comment
  main-loop: update comment for qemu_mutex_lock/unlock_iothread
  timer: fix misleading comment in timer.h
  qemu-timer: check active_timers outside lock/event
  virtio-scsi: introduce virtio_scsi_acquire/release
  build-sys: remove libtool left-over
  rules.mak: add more rules to avoid chaining
  rules.mak: speedup save-vars load-vars
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell před 8 roky
rodič
revize
a470b33259

+ 0 - 4
.gitignore

@@ -82,10 +82,6 @@
 *.d
 !/scripts/qemu-guest-agent/fsfreeze-hook.d
 *.o
-*.lo
-*.la
-*.pc
-.libs
 .sdk
 *.gcda
 *.gcno

+ 3 - 6
Makefile

@@ -231,12 +231,10 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
 
 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
 
-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h
 	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")
 
-Makefile: $(version-obj-y) $(version-lobj-y)
+Makefile: $(version-obj-y)
 
 ######################################################################
 # Build libraries
@@ -358,10 +356,9 @@ clean:
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
 	rm -f qemu-options.def
 	rm -f *.msi
-	find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
+	find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
 	rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -f fsdev/*.pod
-	rm -rf .libs */.libs
 	rm -f qemu-img-cmds.h
 	rm -f ui/shader/*-vert.h ui/shader/*-frag.h
 	@# May not be present in GENERATED_HEADERS

+ 0 - 1
Makefile.objs

@@ -97,7 +97,6 @@ common-obj-y += disas/
 ######################################################################
 # Resource file for Windows executables
 version-obj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.o
-version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 
 ######################################################################
 # tracing

+ 1 - 0
Makefile.target

@@ -76,6 +76,7 @@ $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 else
 stap:
 endif
+.PHONY: stap
 
 all: $(PROGS) stap
 

+ 0 - 2
configure

@@ -28,8 +28,6 @@ TMPB="qemu-conf"
 TMPC="${TMPDIR1}/${TMPB}.c"
 TMPO="${TMPDIR1}/${TMPB}.o"
 TMPCXX="${TMPDIR1}/${TMPB}.cxx"
-TMPL="${TMPDIR1}/${TMPB}.lo"
-TMPA="${TMPDIR1}/lib${TMPB}.la"
 TMPE="${TMPDIR1}/${TMPB}.exe"
 TMPMO="${TMPDIR1}/${TMPB}.mo"
 

+ 95 - 592
exec.c

@@ -2938,6 +2938,31 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
     return true;
 }
 
+static hwaddr
+address_space_extend_translation(AddressSpace *as, hwaddr addr, hwaddr target_len,
+                                 MemoryRegion *mr, hwaddr base, hwaddr len,
+                                 bool is_write)
+{
+    hwaddr done = 0;
+    hwaddr xlat;
+    MemoryRegion *this_mr;
+
+    for (;;) {
+        target_len -= len;
+        addr += len;
+        done += len;
+        if (target_len == 0) {
+            return done;
+        }
+
+        len = target_len;
+        this_mr = address_space_translate(as, addr, &xlat, &len, is_write);
+        if (this_mr != mr || xlat != base + done) {
+            return done;
+        }
+    }
+}
+
 /* Map a physical memory region into a host virtual address.
  * May map a subset of the requested range, given by and returned in *plen.
  * May return NULL if resources needed to perform the mapping are exhausted.
@@ -2951,9 +2976,8 @@ void *address_space_map(AddressSpace *as,
                         bool is_write)
 {
     hwaddr len = *plen;
-    hwaddr done = 0;
-    hwaddr l, xlat, base;
-    MemoryRegion *mr, *this_mr;
+    hwaddr l, xlat;
+    MemoryRegion *mr;
     void *ptr;
 
     if (len == 0) {
@@ -2987,26 +3011,10 @@ void *address_space_map(AddressSpace *as,
         return bounce.buffer;
     }
 
-    base = xlat;
-
-    for (;;) {
-        len -= l;
-        addr += l;
-        done += l;
-        if (len == 0) {
-            break;
-        }
-
-        l = len;
-        this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
-        if (this_mr != mr || xlat != base + done) {
-            break;
-        }
-    }
 
     memory_region_ref(mr);
-    *plen = done;
-    ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
+    *plen = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
+    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, plen);
     rcu_read_unlock();
 
     return ptr;
@@ -3058,597 +3066,92 @@ void cpu_physical_memory_unmap(void *buffer, hwaddr len,
     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
 }
 
-/* warning: addr must be aligned */
-static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
-                                                  MemTxAttrs attrs,
-                                                  MemTxResult *result,
-                                                  enum device_endian endian)
-{
-    uint8_t *ptr;
-    uint64_t val;
-    MemoryRegion *mr;
-    hwaddr l = 4;
-    hwaddr addr1;
-    MemTxResult r;
-    bool release_lock = false;
-
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l, false);
-    if (l < 4 || !memory_access_is_direct(mr, false)) {
-        release_lock |= prepare_mmio_access(mr);
-
-        /* I/O case */
-        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
-        if (endian == DEVICE_LITTLE_ENDIAN) {
-            val = bswap32(val);
-        }
-#else
-        if (endian == DEVICE_BIG_ENDIAN) {
-            val = bswap32(val);
-        }
-#endif
-    } else {
-        /* RAM case */
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        switch (endian) {
-        case DEVICE_LITTLE_ENDIAN:
-            val = ldl_le_p(ptr);
-            break;
-        case DEVICE_BIG_ENDIAN:
-            val = ldl_be_p(ptr);
-            break;
-        default:
-            val = ldl_p(ptr);
-            break;
-        }
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-    rcu_read_unlock();
-    return val;
-}
-
-uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldl_internal(as, addr, attrs, result,
-                                      DEVICE_NATIVE_ENDIAN);
-}
-
-uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
-                              MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldl_internal(as, addr, attrs, result,
-                                      DEVICE_LITTLE_ENDIAN);
-}
-
-uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
-                              MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldl_internal(as, addr, attrs, result,
-                                      DEVICE_BIG_ENDIAN);
-}
-
-uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
-                                                  MemTxAttrs attrs,
-                                                  MemTxResult *result,
-                                                  enum device_endian endian)
-{
-    uint8_t *ptr;
-    uint64_t val;
-    MemoryRegion *mr;
-    hwaddr l = 8;
-    hwaddr addr1;
-    MemTxResult r;
-    bool release_lock = false;
-
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l,
-                                 false);
-    if (l < 8 || !memory_access_is_direct(mr, false)) {
-        release_lock |= prepare_mmio_access(mr);
-
-        /* I/O case */
-        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
-        if (endian == DEVICE_LITTLE_ENDIAN) {
-            val = bswap64(val);
-        }
-#else
-        if (endian == DEVICE_BIG_ENDIAN) {
-            val = bswap64(val);
-        }
-#endif
-    } else {
-        /* RAM case */
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        switch (endian) {
-        case DEVICE_LITTLE_ENDIAN:
-            val = ldq_le_p(ptr);
-            break;
-        case DEVICE_BIG_ENDIAN:
-            val = ldq_be_p(ptr);
-            break;
-        default:
-            val = ldq_p(ptr);
-            break;
-        }
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-    rcu_read_unlock();
-    return val;
-}
-
-uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldq_internal(as, addr, attrs, result,
-                                      DEVICE_NATIVE_ENDIAN);
-}
-
-uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldq_internal(as, addr, attrs, result,
-                                      DEVICE_LITTLE_ENDIAN);
-}
-
-uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_ldq_internal(as, addr, attrs, result,
-                                      DEVICE_BIG_ENDIAN);
-}
-
-uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* XXX: optimize */
-uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
-                            MemTxAttrs attrs, MemTxResult *result)
-{
-    uint8_t val;
-    MemTxResult r;
-
-    r = address_space_rw(as, addr, attrs, &val, 1, 0);
-    if (result) {
-        *result = r;
-    }
-    return val;
-}
-
-uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline uint32_t address_space_lduw_internal(AddressSpace *as,
-                                                   hwaddr addr,
-                                                   MemTxAttrs attrs,
-                                                   MemTxResult *result,
-                                                   enum device_endian endian)
-{
-    uint8_t *ptr;
-    uint64_t val;
-    MemoryRegion *mr;
-    hwaddr l = 2;
-    hwaddr addr1;
-    MemTxResult r;
-    bool release_lock = false;
-
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l,
-                                 false);
-    if (l < 2 || !memory_access_is_direct(mr, false)) {
-        release_lock |= prepare_mmio_access(mr);
-
-        /* I/O case */
-        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
-#if defined(TARGET_WORDS_BIGENDIAN)
-        if (endian == DEVICE_LITTLE_ENDIAN) {
-            val = bswap16(val);
-        }
-#else
-        if (endian == DEVICE_BIG_ENDIAN) {
-            val = bswap16(val);
-        }
-#endif
-    } else {
-        /* RAM case */
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        switch (endian) {
-        case DEVICE_LITTLE_ENDIAN:
-            val = lduw_le_p(ptr);
-            break;
-        case DEVICE_BIG_ENDIAN:
-            val = lduw_be_p(ptr);
-            break;
-        default:
-            val = lduw_p(ptr);
-            break;
-        }
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-    rcu_read_unlock();
-    return val;
-}
-
-uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_lduw_internal(as, addr, attrs, result,
-                                       DEVICE_NATIVE_ENDIAN);
-}
-
-uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_lduw_internal(as, addr, attrs, result,
-                                       DEVICE_LITTLE_ENDIAN);
-}
-
-uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
-                           MemTxAttrs attrs, MemTxResult *result)
-{
-    return address_space_lduw_internal(as, addr, attrs, result,
-                                       DEVICE_BIG_ENDIAN);
-}
-
-uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
-{
-    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned. The ram page is not masked as dirty
-   and the code inside is not invalidated. It is useful if the dirty
-   bits are used to track modified PTEs */
-void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
-                                MemTxAttrs attrs, MemTxResult *result)
-{
-    uint8_t *ptr;
-    MemoryRegion *mr;
-    hwaddr l = 4;
-    hwaddr addr1;
-    MemTxResult r;
-    uint8_t dirty_log_mask;
-    bool release_lock = false;
-
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l,
-                                 true);
-    if (l < 4 || !memory_access_is_direct(mr, true)) {
-        release_lock |= prepare_mmio_access(mr);
-
-        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
-    } else {
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        stl_p(ptr, val);
-
-        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
-        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
-        cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
-                                            4, dirty_log_mask);
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-    rcu_read_unlock();
-}
-
-void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* warning: addr must be aligned */
-static inline void address_space_stl_internal(AddressSpace *as,
-                                              hwaddr addr, uint32_t val,
-                                              MemTxAttrs attrs,
-                                              MemTxResult *result,
-                                              enum device_endian endian)
-{
-    uint8_t *ptr;
-    MemoryRegion *mr;
-    hwaddr l = 4;
-    hwaddr addr1;
-    MemTxResult r;
-    bool release_lock = false;
-
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l,
-                                 true);
-    if (l < 4 || !memory_access_is_direct(mr, true)) {
-        release_lock |= prepare_mmio_access(mr);
-
-#if defined(TARGET_WORDS_BIGENDIAN)
-        if (endian == DEVICE_LITTLE_ENDIAN) {
-            val = bswap32(val);
-        }
-#else
-        if (endian == DEVICE_BIG_ENDIAN) {
-            val = bswap32(val);
-        }
-#endif
-        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
-    } else {
-        /* RAM case */
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        switch (endian) {
-        case DEVICE_LITTLE_ENDIAN:
-            stl_le_p(ptr, val);
-            break;
-        case DEVICE_BIG_ENDIAN:
-            stl_be_p(ptr, val);
-            break;
-        default:
-            stl_p(ptr, val);
-            break;
-        }
-        invalidate_and_set_dirty(mr, addr1, 4);
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
-    }
-    rcu_read_unlock();
-}
-
-void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stl_internal(as, addr, val, attrs, result,
-                               DEVICE_NATIVE_ENDIAN);
-}
-
-void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stl_internal(as, addr, val, attrs, result,
-                               DEVICE_LITTLE_ENDIAN);
-}
-
-void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stl_internal(as, addr, val, attrs, result,
-                               DEVICE_BIG_ENDIAN);
-}
-
-void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-/* XXX: optimize */
-void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    uint8_t v = val;
-    MemTxResult r;
-
-    r = address_space_rw(as, addr, attrs, &v, 1, 1);
-    if (result) {
-        *result = r;
-    }
-}
-
-void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
+#define ARG1_DECL                AddressSpace *as
+#define ARG1                     as
+#define SUFFIX
+#define TRANSLATE(...)           address_space_translate(as, __VA_ARGS__)
+#define IS_DIRECT(mr, is_write)  memory_access_is_direct(mr, is_write)
+#define MAP_RAM(mr, ofs)         qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK(...)       rcu_read_lock()
+#define RCU_READ_UNLOCK(...)     rcu_read_unlock()
+#include "memory_ldst.inc.c"
 
-/* warning: addr must be aligned */
-static inline void address_space_stw_internal(AddressSpace *as,
-                                              hwaddr addr, uint32_t val,
-                                              MemTxAttrs attrs,
-                                              MemTxResult *result,
-                                              enum device_endian endian)
+int64_t address_space_cache_init(MemoryRegionCache *cache,
+                                 AddressSpace *as,
+                                 hwaddr addr,
+                                 hwaddr len,
+                                 bool is_write)
 {
-    uint8_t *ptr;
+    hwaddr l, xlat;
     MemoryRegion *mr;
-    hwaddr l = 2;
-    hwaddr addr1;
-    MemTxResult r;
-    bool release_lock = false;
+    void *ptr;
 
-    rcu_read_lock();
-    mr = address_space_translate(as, addr, &addr1, &l, true);
-    if (l < 2 || !memory_access_is_direct(mr, true)) {
-        release_lock |= prepare_mmio_access(mr);
+    assert(len > 0);
 
-#if defined(TARGET_WORDS_BIGENDIAN)
-        if (endian == DEVICE_LITTLE_ENDIAN) {
-            val = bswap16(val);
-        }
-#else
-        if (endian == DEVICE_BIG_ENDIAN) {
-            val = bswap16(val);
-        }
-#endif
-        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
-    } else {
-        /* RAM case */
-        ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
-        switch (endian) {
-        case DEVICE_LITTLE_ENDIAN:
-            stw_le_p(ptr, val);
-            break;
-        case DEVICE_BIG_ENDIAN:
-            stw_be_p(ptr, val);
-            break;
-        default:
-            stw_p(ptr, val);
-            break;
-        }
-        invalidate_and_set_dirty(mr, addr1, 2);
-        r = MEMTX_OK;
-    }
-    if (result) {
-        *result = r;
-    }
-    if (release_lock) {
-        qemu_mutex_unlock_iothread();
+    l = len;
+    mr = address_space_translate(as, addr, &xlat, &l, is_write);
+    if (!memory_access_is_direct(mr, is_write)) {
+        return -EINVAL;
     }
-    rcu_read_unlock();
-}
-
-void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stw_internal(as, addr, val, attrs, result,
-                               DEVICE_NATIVE_ENDIAN);
-}
 
-void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stw_internal(as, addr, val, attrs, result,
-                               DEVICE_LITTLE_ENDIAN);
-}
+    l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
+    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
 
-void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    address_space_stw_internal(as, addr, val, attrs, result,
-                               DEVICE_BIG_ENDIAN);
-}
+    cache->xlat = xlat;
+    cache->is_write = is_write;
+    cache->mr = mr;
+    cache->ptr = ptr;
+    cache->len = l;
+    memory_region_ref(cache->mr);
 
-void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
-{
-    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+    return l;
 }
 
-void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
+void address_space_cache_invalidate(MemoryRegionCache *cache,
+                                    hwaddr addr,
+                                    hwaddr access_len)
 {
-    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+    assert(cache->is_write);
+    invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
 }
 
-/* XXX: optimize */
-void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
+void address_space_cache_destroy(MemoryRegionCache *cache)
 {
-    MemTxResult r;
-    val = tswap64(val);
-    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
-    if (result) {
-        *result = r;
+    if (!cache->mr) {
+        return;
     }
-}
 
-void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    MemTxResult r;
-    val = cpu_to_le64(val);
-    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
-    if (result) {
-        *result = r;
-    }
-}
-void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
-                       MemTxAttrs attrs, MemTxResult *result)
-{
-    MemTxResult r;
-    val = cpu_to_be64(val);
-    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
-    if (result) {
-        *result = r;
+    if (xen_enabled()) {
+        xen_invalidate_map_cache_entry(cache->ptr);
     }
+    memory_region_unref(cache->mr);
 }
 
-void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
-    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
-    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
-
-void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
-{
-    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
-}
+/* Called from RCU critical section.  This function has the same
+ * semantics as address_space_translate, but it only works on a
+ * predefined range of a MemoryRegion that was mapped with
+ * address_space_cache_init.
+ */
+static inline MemoryRegion *address_space_translate_cached(
+    MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
+    hwaddr *plen, bool is_write)
+{
+    assert(addr < cache->len && *plen <= cache->len - addr);
+    *xlat = addr + cache->xlat;
+    return cache->mr;
+}
+
+#define ARG1_DECL                MemoryRegionCache *cache
+#define ARG1                     cache
+#define SUFFIX                   _cached
+#define TRANSLATE(...)           address_space_translate_cached(cache, __VA_ARGS__)
+#define IS_DIRECT(mr, is_write)  true
+#define MAP_RAM(mr, ofs)         (cache->ptr + (ofs - cache->xlat))
+#define INVALIDATE(mr, ofs, len) ((void)0)
+#define RCU_READ_LOCK()          ((void)0)
+#define RCU_READ_UNLOCK()        ((void)0)
+#include "memory_ldst.inc.c"
 
 /* virtual memory access for debug (includes writing to ROM) */
 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,

+ 13 - 0
hw/block/pflash_cfi01.c

@@ -707,6 +707,19 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     int num_devices;
     Error *local_err = NULL;
 
+    if (pfl->sector_len == 0) {
+        error_setg(errp, "attribute \"sector-length\" not specified or zero.");
+        return;
+    }
+    if (pfl->nb_blocs == 0) {
+        error_setg(errp, "attribute \"num-blocks\" not specified or zero.");
+        return;
+    }
+    if (pfl->name == NULL) {
+        error_setg(errp, "attribute \"name\" not specified.");
+        return;
+    }
+
     total_len = pfl->sector_len * pfl->nb_blocs;
 
     /* These are only used to expose the parameters of each device

+ 13 - 0
hw/block/pflash_cfi02.c

@@ -600,6 +600,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     int ret;
     Error *local_err = NULL;
 
+    if (pfl->sector_len == 0) {
+        error_setg(errp, "attribute \"sector-length\" not specified or zero.");
+        return;
+    }
+    if (pfl->nb_blocs == 0) {
+        error_setg(errp, "attribute \"num-blocks\" not specified or zero.");
+        return;
+    }
+    if (pfl->name == NULL) {
+        error_setg(errp, "attribute \"name\" not specified.");
+        return;
+    }
+
     chip_len = pfl->sector_len * pfl->nb_blocs;
     /* XXX: to be fixed */
 #if 0

+ 127 - 15
hw/i386/kvm/clock.c

@@ -36,6 +36,13 @@ typedef struct KVMClockState {
 
     uint64_t clock;
     bool clock_valid;
+
+    /* whether machine type supports reliable KVM_GET_CLOCK */
+    bool mach_use_reliable_get_clock;
+
+    /* whether the 'clock' value was obtained in a host with
+     * reliable KVM_GET_CLOCK */
+    bool clock_is_reliable;
 } KVMClockState;
 
 struct pvclock_vcpu_time_info {
@@ -81,6 +88,60 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
     return nsec + time.system_time;
 }
 
+static void kvm_update_clock(KVMClockState *s)
+{
+    struct kvm_clock_data data;
+    int ret;
+
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+                abort();
+    }
+    s->clock = data.clock;
+
+    /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns
+     * essentially CLOCK_MONOTONIC plus a guest-specific adjustment.  This
+     * can drift from the TSC-based value that is computed by the guest,
+     * so we need to go through kvmclock_current_nsec().  If
+     * kvm_has_adjust_clock_stable() is true, and the flags contain
+     * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value
+     * and kvmclock_current_nsec() is not necessary.
+     *
+     * Here, however, we need not check KVM_CLOCK_TSC_STABLE.  This is because:
+     *
+     * - if the host has disabled the kvmclock master clock, the guest already
+     *   has protection against time going backwards.  This "safety net" is only
+     *   absent when kvmclock is stable;
+     *
+     * - therefore, we can replace a check like
+     *
+     *       if last KVM_GET_CLOCK was not reliable then
+     *               read from memory
+     *
+     *   with
+     *
+     *       if last KVM_GET_CLOCK was not reliable && masterclock is enabled
+     *               read from memory
+     *
+     * However:
+     *
+     * - if kvm_has_adjust_clock_stable() returns false, the left side is
+     *   always true (KVM_GET_CLOCK is never reliable), and the right side is
+     *   unknown (because we don't have data.flags).  We must assume it's true
+     *   and read from memory.
+     *
+     * - if kvm_has_adjust_clock_stable() returns true, the result of the &&
+     *   is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable)
+     *
+     * So we can just use this instead:
+     *
+     *       if !kvm_has_adjust_clock_stable() then
+     *               read from memory
+     */
+    s->clock_is_reliable = kvm_has_adjust_clock_stable();
+}
+
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
 {
@@ -91,15 +152,21 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t time_at_migration = kvmclock_current_nsec(s);
-
-        s->clock_valid = false;
 
-        /* We can't rely on the migrated clock value, just discard it */
-        if (time_at_migration) {
-            s->clock = time_at_migration;
+        /*
+         * If the host where s->clock was read did not support reliable
+         * KVM_GET_CLOCK, read kvmclock value from memory.
+         */
+        if (!s->clock_is_reliable) {
+            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+            /* We can't rely on the saved clock value, just discard it */
+            if (pvclock_via_mem) {
+                s->clock = pvclock_via_mem;
+            }
         }
 
+        s->clock_valid = false;
+
         data.clock = s->clock;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
         if (ret < 0) {
@@ -120,8 +187,6 @@ static void kvmclock_vm_state_change(void *opaque, int running,
             }
         }
     } else {
-        struct kvm_clock_data data;
-        int ret;
 
         if (s->clock_valid) {
             return;
@@ -129,13 +194,7 @@ static void kvmclock_vm_state_change(void *opaque, int running,
 
         kvm_synchronize_all_tsc();
 
-        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
-        if (ret < 0) {
-            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
-            abort();
-        }
-        s->clock = data.clock;
-
+        kvm_update_clock(s);
         /*
          * If the VM is stopped, declare the clock state valid to
          * avoid re-reading it on next vmsave (which would return
@@ -149,25 +208,78 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
 {
     KVMClockState *s = KVM_CLOCK(dev);
 
+    kvm_update_clock(s);
+
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }
 
+static bool kvmclock_clock_is_reliable_needed(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    return s->mach_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+    .name = "kvmclock/clock_is_reliable",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_clock_is_reliable_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(clock_is_reliable, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/*
+ * When migrating, read the clock just before migration,
+ * so that the guest clock counts during the events
+ * between:
+ *
+ *  * vm_stop()
+ *  *
+ *  * pre_save()
+ *
+ *  This reduces kvmclock difference on migration from 5s
+ *  to 0.1s (when max_downtime == 5s), because sending the
+ *  final pages of memory (which happens between vm_stop()
+ *  and pre_save()) takes max_downtime.
+ */
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    kvm_update_clock(s);
+}
+
 static const VMStateDescription kvmclock_vmsd = {
     .name = "kvmclock",
     .version_id = 1,
     .minimum_version_id = 1,
+    .pre_save = kvmclock_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &kvmclock_reliable_get_clock,
+        NULL
     }
 };
 
+static Property kvmclock_properties[] = {
+    DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
+                      mach_use_reliable_get_clock, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void kvmclock_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = kvmclock_realize;
     dc->vmsd = &kvmclock_vmsd;
+    dc->props = kvmclock_properties;
 }
 
 static const TypeInfo kvmclock_info = {

+ 10 - 10
hw/i386/multiboot.c

@@ -109,7 +109,7 @@ static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline)
     hwaddr p = s->offset_cmdlines;
     char *b = (char *)s->mb_buf + p;
 
-    get_opt_value(b, strlen(cmdline) + 1, cmdline);
+    memcpy(b, cmdline, strlen(cmdline) + 1);
     s->offset_cmdlines += strlen(b) + 1;
     return s->mb_buf_phys + p;
 }
@@ -287,7 +287,8 @@ int load_multiboot(FWCfgState *fw_cfg,
     mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len;
 
     if (initrd_filename) {
-        char *next_initrd, not_last;
+        const char *next_initrd;
+        char not_last, tmpbuf[strlen(initrd_filename) + 1];
 
         mbs.offset_mods = mbs.mb_buf_size;
 
@@ -296,25 +297,24 @@ int load_multiboot(FWCfgState *fw_cfg,
             int mb_mod_length;
             uint32_t offs = mbs.mb_buf_size;
 
-            next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename);
+            next_initrd = get_opt_value(tmpbuf, sizeof(tmpbuf), initrd_filename);
             not_last = *next_initrd;
-            *next_initrd = '\0';
             /* if a space comes after the module filename, treat everything
                after that as parameters */
-            hwaddr c = mb_add_cmdline(&mbs, initrd_filename);
-            if ((next_space = strchr(initrd_filename, ' ')))
+            hwaddr c = mb_add_cmdline(&mbs, tmpbuf);
+            if ((next_space = strchr(tmpbuf, ' ')))
                 *next_space = '\0';
-            mb_debug("multiboot loading module: %s\n", initrd_filename);
-            mb_mod_length = get_image_size(initrd_filename);
+            mb_debug("multiboot loading module: %s\n", tmpbuf);
+            mb_mod_length = get_image_size(tmpbuf);
             if (mb_mod_length < 0) {
-                fprintf(stderr, "Failed to open file '%s'\n", initrd_filename);
+                fprintf(stderr, "Failed to open file '%s'\n", tmpbuf);
                 exit(1);
             }
 
             mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size);
             mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size);
 
-            load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs);
+            load_image(tmpbuf, (unsigned char *)mbs.mb_buf + offs);
             mb_add_mod(&mbs, mbs.mb_buf_phys + offs,
                        mbs.mb_buf_phys + offs + mb_mod_length, c);
 

+ 62 - 6
hw/i386/pc.c

@@ -400,13 +400,13 @@ static void pc_cmos_init_late(void *opaque)
     int i, trans;
 
     val = 0;
-    if (ide_get_geometry(arg->idebus[0], 0,
-                         &cylinders, &heads, &sectors) >= 0) {
+    if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 0,
+                                           &cylinders, &heads, &sectors) >= 0) {
         cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
         val |= 0xf0;
     }
-    if (ide_get_geometry(arg->idebus[0], 1,
-                         &cylinders, &heads, &sectors) >= 0) {
+    if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 1,
+                                           &cylinders, &heads, &sectors) >= 0) {
         cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
         val |= 0x0f;
     }
@@ -418,7 +418,8 @@ static void pc_cmos_init_late(void *opaque)
            geometry.  It is always such that: 1 <= sects <= 63, 1
            <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
            geometry can be different if a translation is done. */
-        if (ide_get_geometry(arg->idebus[i / 2], i % 2,
+        if (arg->idebus[i / 2] &&
+            ide_get_geometry(arg->idebus[i / 2], i % 2,
                              &cylinders, &heads, &sectors) >= 0) {
             trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
             assert((trans & ~3) == 0);
@@ -1535,6 +1536,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
                           ISADevice **rtc_state,
                           bool create_fdctrl,
                           bool no_vmport,
+                          bool has_pit,
                           uint32_t hpet_irqs)
 {
     int i;
@@ -1588,7 +1590,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
 
     qemu_register_boot_set(pc_boot_set, *rtc_state);
 
-    if (!xen_enabled()) {
+    if (!xen_enabled() && has_pit) {
         if (kvm_pit_in_kernel()) {
             pit = kvm_pit_init(isa_bus, 0x40);
         } else {
@@ -2158,6 +2160,48 @@ static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
     pcms->acpi_nvdimm_state.is_enabled = value;
 }
 
+static bool pc_machine_get_smbus(Object *obj, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    return pcms->smbus;
+}
+
+static void pc_machine_set_smbus(Object *obj, bool value, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    pcms->smbus = value;
+}
+
+static bool pc_machine_get_sata(Object *obj, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    return pcms->sata;
+}
+
+static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    pcms->sata = value;
+}
+
+static bool pc_machine_get_pit(Object *obj, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    return pcms->pit;
+}
+
+static void pc_machine_set_pit(Object *obj, bool value, Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+
+    pcms->pit = value;
+}
+
 static void pc_machine_initfn(Object *obj)
 {
     PCMachineState *pcms = PC_MACHINE(obj);
@@ -2169,6 +2213,9 @@ static void pc_machine_initfn(Object *obj)
     pcms->acpi_nvdimm_state.is_enabled = false;
     /* acpi build is enabled by default if machine supports it */
     pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
+    pcms->smbus = true;
+    pcms->sata = true;
+    pcms->pit = true;
 }
 
 static void pc_machine_reset(void)
@@ -2329,6 +2376,15 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
 
     object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
         pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
+
+    object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
+        pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);
+
+    object_class_property_add_bool(oc, PC_MACHINE_SATA,
+        pc_machine_get_sata, pc_machine_set_sata, &error_abort);
+
+    object_class_property_add_bool(oc, PC_MACHINE_PIT,
+        pc_machine_get_pit, pc_machine_set_pit, &error_abort);
 }
 
 static const TypeInfo pc_machine_info = {

+ 1 - 1
hw/i386/pc_piix.c

@@ -235,7 +235,7 @@ static void pc_init1(MachineState *machine,
 
     /* init basic PC hardware */
     pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true,
-                         (pcms->vmport != ON_OFF_AUTO_ON), 0x4);
+                         (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, 0x4);
 
     pc_nic_init(isa_bus, pci_bus);
 

+ 23 - 16
hw/i386/pc_q35.c

@@ -227,32 +227,39 @@ static void pc_q35_init(MachineState *machine)
 
     /* init basic PC hardware */
     pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy,
-                         (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104);
+                         (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit,
+                         0xff0104);
 
     /* connect pm stuff to lpc */
     ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pcms));
 
-    /* ahci and SATA device, for q35 1 ahci controller is built-in */
-    ahci = pci_create_simple_multifunction(host_bus,
-                                           PCI_DEVFN(ICH9_SATA1_DEV,
-                                                     ICH9_SATA1_FUNC),
-                                           true, "ich9-ahci");
-    idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0");
-    idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1");
-    g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports);
-    ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
-    ahci_ide_create_devs(ahci, hd);
+    if (pcms->sata) {
+        /* ahci and SATA device, for q35 1 ahci controller is built-in */
+        ahci = pci_create_simple_multifunction(host_bus,
+                                               PCI_DEVFN(ICH9_SATA1_DEV,
+                                                         ICH9_SATA1_FUNC),
+                                               true, "ich9-ahci");
+        idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0");
+        idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1");
+        g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports);
+        ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+        ahci_ide_create_devs(ahci, hd);
+    } else {
+        idebus[0] = idebus[1] = NULL;
+    }
 
     if (machine_usb(machine)) {
         /* Should we create 6 UHCI according to ich9 spec? */
         ehci_create_ich9_with_companions(host_bus, 0x1d);
     }
 
-    /* TODO: Populate SPD eeprom data.  */
-    smbus_eeprom_init(ich9_smb_init(host_bus,
-                                    PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC),
-                                    0xb100),
-                      8, NULL, 0);
+    if (pcms->smbus) {
+        /* TODO: Populate SPD eeprom data.  */
+        smbus_eeprom_init(ich9_smb_init(host_bus,
+                                        PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC),
+                                        0xb100),
+                          8, NULL, 0);
+    }
 
     pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
 

+ 8 - 1
hw/scsi/scsi-disk.c

@@ -2157,6 +2157,13 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
         DPRINTF("Write %s(sector %" PRId64 ", count %u)\n",
                 (command & 0xe) == 0xe ? "And Verify " : "",
                 r->req.cmd.lba, len);
+    case VERIFY_10:
+    case VERIFY_12:
+    case VERIFY_16:
+        /* We get here only for BYTCHK == 0x01 and only for scsi-block.
+         * As far as DMA is concerned, we can treat it the same as a write;
+         * scsi_block_do_sgio will send VERIFY commands.
+         */
         if (r->req.cmd.buf[1] & 0xe0) {
             goto illegal_request;
         }
@@ -2712,7 +2719,7 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
     case WRITE_VERIFY_16:
         /* MMC writing cannot be done via DMA helpers, because it sometimes
          * involves writing beyond the maximum LBA or to negative LBA (lead-in).
-         * We might use scsi_disk_dma_reqops as long as no writing commands are
+         * We might use scsi_block_dma_reqops as long as no writing commands are
          * seen, but performance usually isn't paramount on optical media.  So,
          * just make scsi-block operate the same as scsi-generic for them.
          */

+ 18 - 9
hw/scsi/virtio-scsi.c

@@ -420,6 +420,20 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
     }
 }
 
+static inline void virtio_scsi_acquire(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_acquire(s->ctx);
+    }
+}
+
+static inline void virtio_scsi_release(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_release(s->ctx);
+    }
+}
+
 void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
     VirtIOSCSIReq *req;
@@ -691,10 +705,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
         return;
     }
 
-    if (s->dataplane_started) {
-        assert(s->ctx);
-        aio_context_acquire(s->ctx);
-    }
+    virtio_scsi_acquire(s);
 
     req = virtio_scsi_pop_req(s, vs->event_vq);
     if (!req) {
@@ -730,9 +741,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
     }
     virtio_scsi_complete_req(req);
 out:
-    if (s->dataplane_started) {
-        aio_context_release(s->ctx);
-    }
+    virtio_scsi_release(s);
 }
 
 void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
@@ -778,9 +787,9 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
         if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
             return;
         }
-        aio_context_acquire(s->ctx);
+        virtio_scsi_acquire(s);
         blk_set_aio_context(sd->conf.blk, s->ctx);
-        aio_context_release(s->ctx);
+        virtio_scsi_release(s);
 
     }
 

+ 9 - 0
hw/watchdog/wdt_i6300esb.c

@@ -428,6 +428,14 @@ static void i6300esb_realize(PCIDevice *dev, Error **errp)
     /* qemu_register_coalesced_mmio (addr, 0x10); ? */
 }
 
+static void i6300esb_exit(PCIDevice *dev)
+{
+    I6300State *d = WATCHDOG_I6300ESB_DEVICE(dev);
+
+    timer_del(d->timer);
+    timer_free(d->timer);
+}
+
 static WatchdogTimerModel model = {
     .wdt_name = "i6300esb",
     .wdt_description = "Intel 6300ESB",
@@ -441,6 +449,7 @@ static void i6300esb_class_init(ObjectClass *klass, void *data)
     k->config_read = i6300esb_config_read;
     k->config_write = i6300esb_config_write;
     k->realize = i6300esb_realize;
+    k->exit = i6300esb_exit;
     k->vendor_id = PCI_VENDOR_ID_INTEL;
     k->device_id = PCI_DEVICE_ID_INTEL_ESB_9;
     k->class_id = PCI_CLASS_SYSTEM_OTHER;

+ 2 - 2
include/block/aio.h

@@ -195,8 +195,8 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
  * aio_notify: Force processing of pending events.
  *
  * Similar to signaling a condition variable, aio_notify forces
- * aio_wait to exit, so that the next call will re-examine pending events.
- * The caller of aio_notify will usually call aio_wait again very soon,
+ * aio_poll to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_poll again very soon,
  * or go through another iteration of the GLib main loop.  Hence, aio_notify
  * also has the side effect of recalculating the sets of file descriptors
  * that the main loop waits for.

+ 23 - 0
include/exec/cpu-all.h

@@ -186,6 +186,29 @@ void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
                             MemTxAttrs attrs, MemTxResult *result);
 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
                             MemTxAttrs attrs, MemTxResult *result);
+
+uint32_t lduw_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint32_t ldl_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint64_t ldq_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+void stl_phys_notdirty_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stw_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stl_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stq_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val);
+
+uint32_t address_space_lduw_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_notdirty_cached(MemoryRegionCache *cache, hwaddr addr,
+                            uint32_t val, MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
 #endif
 
 /* page related stuff */

+ 0 - 15
include/exec/cpu-common.h

@@ -94,21 +94,6 @@ bool cpu_physical_memory_is_io(hwaddr phys_addr);
  */
 void qemu_flush_coalesced_mmio_buffer(void);
 
-uint32_t ldub_phys(AddressSpace *as, hwaddr addr);
-uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr);
-uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr);
-uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr);
-uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr);
-uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr);
-uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr);
-void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val);
-void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val);
-void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val);
-void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val);
-void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val);
-void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val);
-void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val);
-
 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
                                    const uint8_t *buf, int len);
 void cpu_flush_icache_range(hwaddr start, int len);

+ 166 - 0
include/exec/memory.h

@@ -1404,6 +1404,140 @@ void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
                             MemTxAttrs attrs, MemTxResult *result);
 
+uint32_t ldub_phys(AddressSpace *as, hwaddr addr);
+uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr);
+uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr);
+uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr);
+uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr);
+uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr);
+uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr);
+void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val);
+void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val);
+void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val);
+void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val);
+void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val);
+void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val);
+void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val);
+
+struct MemoryRegionCache {
+    hwaddr xlat;
+    void *ptr;
+    hwaddr len;
+    MemoryRegion *mr;
+    bool is_write;
+};
+
+/* address_space_cache_init: prepare for repeated access to a physical
+ * memory region
+ *
+ * @cache: #MemoryRegionCache to be filled
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @len: length of buffer
+ * @is_write: indicates the transfer direction
+ *
+ * Will only work with RAM, and may map a subset of the requested range by
+ * returning a value that is less than @len.  On failure, return a negative
+ * errno value.
+ *
+ * Because it only works with RAM, this function can be used for
+ * read-modify-write operations.  In this case, is_write should be %true.
+ *
+ * Note that addresses passed to the address_space_*_cached functions
+ * are relative to @addr.
+ */
+int64_t address_space_cache_init(MemoryRegionCache *cache,
+                                 AddressSpace *as,
+                                 hwaddr addr,
+                                 hwaddr len,
+                                 bool is_write);
+
+/**
+ * address_space_cache_invalidate: complete a write to a #MemoryRegionCache
+ *
+ * @cache: The #MemoryRegionCache to operate on.
+ * @addr: The first physical address that was written, relative to the
+ * address that was passed to @address_space_cache_init.
+ * @access_len: The number of bytes that were written starting at @addr.
+ */
+void address_space_cache_invalidate(MemoryRegionCache *cache,
+                                    hwaddr addr,
+                                    hwaddr access_len);
+
+/**
+ * address_space_cache_destroy: free a #MemoryRegionCache
+ *
+ * @cache: The #MemoryRegionCache whose memory should be released.
+ */
+void address_space_cache_destroy(MemoryRegionCache *cache);
+
+/* address_space_ld*_cached: load from a cached #MemoryRegion
+ * address_space_st*_cached: store into a cached #MemoryRegion
+ *
+ * These functions perform a load or store of the byte, word,
+ * longword or quad to the specified address.  The address is
+ * a physical address in the AddressSpace, but it must lie within
+ * a #MemoryRegion that was mapped with address_space_cache_init.
+ *
+ * The _le suffixed functions treat the data as little endian;
+ * _be indicates big endian; no suffix indicates "same endianness
+ * as guest CPU".
+ *
+ * The "guest CPU endianness" accessors are deprecated for use outside
+ * target-* code; devices should be CPU-agnostic and use either the LE
+ * or the BE accessors.
+ *
+ * @cache: previously initialized #MemoryRegionCache to be accessed
+ * @addr: address within the address space
+ * @val: data value, for stores
+ * @attrs: memory transaction attributes
+ * @result: location to write the success/failure of the transaction;
+ *   if NULL, this information is discarded
+ */
+uint32_t address_space_ldub_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_lduw_le_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_lduw_be_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl_le_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl_be_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq_le_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq_be_cached(MemoryRegionCache *cache, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stb_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw_le_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw_be_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_le_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_be_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq_le_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq_be_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+
+uint32_t ldub_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint32_t lduw_le_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint32_t lduw_be_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint32_t ldl_le_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint32_t ldl_be_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint64_t ldq_le_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+uint64_t ldq_be_phys_cached(MemoryRegionCache *cache, hwaddr addr);
+void stb_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stw_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stw_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stl_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stl_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint32_t val);
+void stq_le_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val);
+void stq_be_phys_cached(MemoryRegionCache *cache, hwaddr addr, uint64_t val);
+
 /* address_space_translate: translate an address range into an address space
  * into a MemoryRegion and an address range into that section.  Should be
  * called from an RCU critical section, to avoid that the last reference
@@ -1529,6 +1663,38 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
     return result;
 }
 
+/**
+ * address_space_read_cached: read from a cached RAM region
+ *
+ * @cache: Cached region to be addressed
+ * @addr: address relative to the base of the RAM region
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ */
+static inline void
+address_space_read_cached(MemoryRegionCache *cache, hwaddr addr,
+                          void *buf, int len)
+{
+    assert(addr < cache->len && len <= cache->len - addr);
+    memcpy(buf, cache->ptr + addr, len);
+}
+
+/**
+ * address_space_write_cached: write to a cached RAM region
+ *
+ * @cache: Cached region to be addressed
+ * @addr: address relative to the base of the RAM region
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ */
+static inline void
+address_space_write_cached(MemoryRegionCache *cache, hwaddr addr,
+                           void *buf, int len)
+{
+    assert(addr < cache->len && len <= cache->len - addr);
+    memcpy(cache->ptr + addr, buf, len);
+}
+
 #endif
 
 #endif

+ 12 - 0
include/hw/i386/pc.h

@@ -63,6 +63,9 @@ struct PCMachineState {
     AcpiNVDIMMState acpi_nvdimm_state;
 
     bool acpi_build_enabled;
+    bool smbus;
+    bool sata;
+    bool pit;
 
     /* RAM information (sizes, addresses, configuration): */
     ram_addr_t below_4g_mem_size, above_4g_mem_size;
@@ -88,6 +91,9 @@ struct PCMachineState {
 #define PC_MACHINE_VMPORT           "vmport"
 #define PC_MACHINE_SMM              "smm"
 #define PC_MACHINE_NVDIMM           "nvdimm"
+#define PC_MACHINE_SMBUS            "smbus"
+#define PC_MACHINE_SATA             "sata"
+#define PC_MACHINE_PIT              "pit"
 
 /**
  * PCMachineClass:
@@ -260,6 +266,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
                           ISADevice **rtc_state,
                           bool create_fdctrl,
                           bool no_vmport,
+                          bool has_pit,
                           uint32_t hpet_irqs);
 void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
 void pc_cmos_init(PCMachineState *pcms,
@@ -371,6 +378,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_7 \
     HW_COMPAT_2_7 \
+    {\
+        .driver   = "kvmclock",\
+        .property = "x-mach-use-reliable-get-clock",\
+        .value    = "off",\
+    },\
     {\
         .driver   = TYPE_X86_CPU,\
         .property = "l3-cache",\

+ 2 - 2
include/qemu/main-loop.h

@@ -238,7 +238,7 @@ bool qemu_mutex_iothread_locked(void);
  * qemu_mutex_lock_iothread: Lock the main loop mutex.
  *
  * This function locks the main loop mutex.  The mutex is taken by
- * qemu_init_main_loop and always taken except while waiting on
+ * main() in vl.c and always taken except while waiting on
  * external events (such as with select).  The mutex should be taken
  * by threads other than the main loop thread when calling
  * qemu_bh_new(), qemu_set_fd_handler() and basically all other
@@ -253,7 +253,7 @@ void qemu_mutex_lock_iothread(void);
  * qemu_mutex_unlock_iothread: Unlock the main loop mutex.
  *
  * This function unlocks the main loop mutex.  The mutex is taken by
- * qemu_init_main_loop and always taken except while waiting on
+ * main() in vl.c and always taken except while waiting on
  * external events (such as with select).  The mutex should be unlocked
  * as soon as possible by threads other than the main loop thread,
  * because it prevents the main loop from processing callbacks,

+ 1 - 1
include/qemu/timer.h

@@ -133,7 +133,7 @@ bool qemu_clock_has_timers(QEMUClockType type);
  * @type: the clock type
  *
  * Determines whether a clock's default timer list
- * has an expired clock.
+ * has an expired timer.
  *
  * Returns: true if the clock's default timer list has
  * an expired timer

+ 1 - 0
include/qemu/typedefs.h

@@ -45,6 +45,7 @@ typedef struct MachineState MachineState;
 typedef struct MemoryListener MemoryListener;
 typedef struct MemoryMappingList MemoryMappingList;
 typedef struct MemoryRegion MemoryRegion;
+typedef struct MemoryRegionCache MemoryRegionCache;
 typedef struct MemoryRegionSection MemoryRegionSection;
 typedef struct MigrationIncomingState MigrationIncomingState;
 typedef struct MigrationParams MigrationParams;

+ 1 - 0
include/standard-headers/linux/input.h

@@ -245,6 +245,7 @@ struct input_mask {
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
 #define BUS_CEC			0x1E
+#define BUS_INTEL_ISHTP		0x1F
 
 /*
  * MT_TOOL types

+ 14 - 1
include/standard-headers/linux/pci_regs.h

@@ -612,6 +612,8 @@
  */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI		0x00000020 /* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE	0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64	0x00000100 /* Atomic 64-bit compare */
 #define  PCI_EXP_DEVCAP2_LTR		0x00000800 /* Latency tolerance reporting */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
@@ -619,6 +621,7 @@
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_ARI		0x0020	/* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ	0x0040	/* Set Atomic requests */
 #define  PCI_EXP_DEVCTL2_IDO_REQ_EN	0x0100	/* Allow IDO for requests */
 #define  PCI_EXP_DEVCTL2_IDO_CMP_EN	0x0200	/* Allow IDO for completions */
 #define  PCI_EXP_DEVCTL2_LTR_EN		0x0400	/* Enable LTR mechanism */
@@ -671,7 +674,8 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_DPC
+#define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -964,4 +968,13 @@
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
+/* Precision Time Measurement */
+#define PCI_PTM_CAP			0x04	    /* PTM Capability */
+#define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
+#define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
+#define PCI_PTM_CTRL			0x08	    /* PTM Control */
+#define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
+#define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
+
 #endif /* LINUX_PCI_REGS_H */

+ 7 - 0
linux-headers/asm-arm/kvm.h

@@ -84,6 +84,13 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+/* Supported VGICv3 address types  */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST	2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+
+#define KVM_VGIC_V3_DIST_SIZE		SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 

+ 3 - 0
linux-headers/asm-x86/unistd_32.h

@@ -377,5 +377,8 @@
 #define __NR_copy_file_range 377
 #define __NR_preadv2 378
 #define __NR_pwritev2 379
+#define __NR_pkey_mprotect 380
+#define __NR_pkey_alloc 381
+#define __NR_pkey_free 382
 
 #endif /* _ASM_X86_UNISTD_32_H */

+ 3 - 0
linux-headers/asm-x86/unistd_64.h

@@ -330,5 +330,8 @@
 #define __NR_copy_file_range 326
 #define __NR_preadv2 327
 #define __NR_pwritev2 328
+#define __NR_pkey_mprotect 329
+#define __NR_pkey_alloc 330
+#define __NR_pkey_free 331
 
 #endif /* _ASM_X86_UNISTD_64_H */

+ 3 - 0
linux-headers/asm-x86/unistd_x32.h

@@ -283,6 +283,9 @@
 #define __NR_membarrier (__X32_SYSCALL_BIT + 324)
 #define __NR_mlock2 (__X32_SYSCALL_BIT + 325)
 #define __NR_copy_file_range (__X32_SYSCALL_BIT + 326)
+#define __NR_pkey_mprotect (__X32_SYSCALL_BIT + 329)
+#define __NR_pkey_alloc (__X32_SYSCALL_BIT + 330)
+#define __NR_pkey_free (__X32_SYSCALL_BIT + 331)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)

+ 7 - 0
linux-headers/linux/kvm.h

@@ -972,12 +972,19 @@ struct kvm_irqfd {
 	__u8  pad[16];
 };
 
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
+#define KVM_CLOCK_TSC_STABLE		2
+
 struct kvm_clock_data {
 	__u64 clock;
 	__u32 flags;
 	__u32 pad[9];
 };
 
+/* For KVM_CAP_SW_TLB */
+
 #define KVM_MMU_FSL_BOOKE_NOHV		0
 #define KVM_MMU_FSL_BOOKE_HV		1
 

+ 709 - 0
memory_ldst.inc.c

@@ -0,0 +1,709 @@
+/*
+ *  Physical memory access templates
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2015 Linaro, Inc.
+ *  Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* warning: addr must be aligned */
+static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result,
+    enum device_endian endian)
+{
+    uint8_t *ptr;
+    uint64_t val;
+    MemoryRegion *mr;
+    hwaddr l = 4;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, false);
+    if (l < 4 || !IS_DIRECT(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
+        /* I/O case */
+        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldl_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldl_be_p(ptr);
+            break;
+        default:
+            val = ldl_p(ptr);
+            break;
+        }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+    return val;
+}
+
+uint32_t glue(address_space_ldl, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t glue(address_space_ldl_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t glue(address_space_ldl_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldl_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_BIG_ENDIAN);
+}
+
+uint32_t glue(ldl_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldl, SUFFIX)(ARG1, addr,
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint32_t glue(ldl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldl_le, SUFFIX)(ARG1, addr,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint32_t glue(ldl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldl_be, SUFFIX)(ARG1, addr,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+/* warning: addr must be aligned */
+static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result,
+    enum device_endian endian)
+{
+    uint8_t *ptr;
+    uint64_t val;
+    MemoryRegion *mr;
+    hwaddr l = 8;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, false);
+    if (l < 8 || !IS_DIRECT(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
+        /* I/O case */
+        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap64(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap64(val);
+        }
+#endif
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = ldq_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = ldq_be_p(ptr);
+            break;
+        default:
+            val = ldq_p(ptr);
+            break;
+        }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+    return val;
+}
+
+uint64_t glue(address_space_ldq, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_NATIVE_ENDIAN);
+}
+
+uint64_t glue(address_space_ldq_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_LITTLE_ENDIAN);
+}
+
+uint64_t glue(address_space_ldq_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_ldq_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                    DEVICE_BIG_ENDIAN);
+}
+
+uint64_t glue(ldq_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldq, SUFFIX)(ARG1, addr,
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint64_t glue(ldq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldq_le, SUFFIX)(ARG1, addr,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint64_t glue(ldq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldq_be, SUFFIX)(ARG1, addr,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint32_t glue(address_space_ldub, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    uint8_t *ptr;
+    uint64_t val;
+    MemoryRegion *mr;
+    hwaddr l = 1;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, false);
+    if (!IS_DIRECT(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
+        /* I/O case */
+        r = memory_region_dispatch_read(mr, addr1, &val, 1, attrs);
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        val = ldub_p(ptr);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+    return val;
+}
+
+uint32_t glue(ldub_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_ldub, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+/* warning: addr must be aligned */
+static inline uint32_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result,
+    enum device_endian endian)
+{
+    uint8_t *ptr;
+    uint64_t val;
+    MemoryRegion *mr;
+    hwaddr l = 2;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, false);
+    if (l < 2 || !IS_DIRECT(mr, false)) {
+        release_lock |= prepare_mmio_access(mr);
+
+        /* I/O case */
+        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            val = lduw_le_p(ptr);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            val = lduw_be_p(ptr);
+            break;
+        default:
+            val = lduw_p(ptr);
+            break;
+        }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+    return val;
+}
+
+uint32_t glue(address_space_lduw, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                     DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t glue(address_space_lduw_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                                     DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t glue(address_space_lduw_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, MemTxAttrs attrs, MemTxResult *result)
+{
+    return glue(address_space_lduw_internal, SUFFIX)(ARG1, addr, attrs, result,
+                                       DEVICE_BIG_ENDIAN);
+}
+
+uint32_t glue(lduw_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw, SUFFIX)(ARG1, addr,
+                                            MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint32_t glue(lduw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_le, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+uint32_t glue(lduw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr)
+{
+    return glue(address_space_lduw_be, SUFFIX)(ARG1, addr,
+                                               MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+/* warning: addr must be aligned. The ram page is not masked as dirty
+   and the code inside is not invalidated. It is useful if the dirty
+   bits are used to track modified PTEs */
+void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    uint8_t *ptr;
+    MemoryRegion *mr;
+    hwaddr l = 4;
+    hwaddr addr1;
+    MemTxResult r;
+    uint8_t dirty_log_mask;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, true);
+    if (l < 4 || !IS_DIRECT(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
+        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
+    } else {
+        ptr = MAP_RAM(mr, addr1);
+        stl_p(ptr, val);
+
+        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+        cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
+                                            4, dirty_log_mask);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+}
+
+void glue(stl_phys_notdirty, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stl_notdirty, SUFFIX)(ARG1, addr, val,
+                                             MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+/* warning: addr must be aligned */
+static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs,
+    MemTxResult *result, enum device_endian endian)
+{
+    uint8_t *ptr;
+    MemoryRegion *mr;
+    hwaddr l = 4;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, true);
+    if (l < 4 || !IS_DIRECT(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap32(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap32(val);
+        }
+#endif
+        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stl_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stl_be_p(ptr, val);
+            break;
+        default:
+            stl_p(ptr, val);
+            break;
+        }
+        INVALIDATE(mr, addr1, 4);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+}
+
+void glue(address_space_stl, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs,
+                                             result, DEVICE_NATIVE_ENDIAN);
+}
+
+void glue(address_space_stl_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs,
+                                             result, DEVICE_LITTLE_ENDIAN);
+}
+
+void glue(address_space_stl_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stl_internal, SUFFIX)(ARG1, addr, val, attrs,
+                                             result, DEVICE_BIG_ENDIAN);
+}
+
+void glue(stl_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stl, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stl_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stl_le, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stl_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stl_be, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(address_space_stb, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    uint8_t *ptr;
+    MemoryRegion *mr;
+    hwaddr l = 1;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, true);
+    if (!IS_DIRECT(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+        r = memory_region_dispatch_write(mr, addr1, val, 1, attrs);
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        stb_p(ptr, val);
+        INVALIDATE(mr, addr1, 1);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+}
+
+void glue(stb_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stb, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+/* warning: addr must be aligned */
+static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs,
+    MemTxResult *result, enum device_endian endian)
+{
+    uint8_t *ptr;
+    MemoryRegion *mr;
+    hwaddr l = 2;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, true);
+    if (l < 2 || !IS_DIRECT(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap16(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap16(val);
+        }
+#endif
+        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stw_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stw_be_p(ptr, val);
+            break;
+        default:
+            stw_p(ptr, val);
+            break;
+        }
+        INVALIDATE(mr, addr1, 2);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+}
+
+void glue(address_space_stw, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                                             DEVICE_NATIVE_ENDIAN);
+}
+
+void glue(address_space_stw_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                                             DEVICE_LITTLE_ENDIAN);
+}
+
+void glue(address_space_stw_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint32_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stw_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                               DEVICE_BIG_ENDIAN);
+}
+
+void glue(stw_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stw, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stw_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stw_le, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stw_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint32_t val)
+{
+    glue(address_space_stw_be, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint64_t val, MemTxAttrs attrs,
+    MemTxResult *result, enum device_endian endian)
+{
+    uint8_t *ptr;
+    MemoryRegion *mr;
+    hwaddr l = 8;
+    hwaddr addr1;
+    MemTxResult r;
+    bool release_lock = false;
+
+    RCU_READ_LOCK();
+    mr = TRANSLATE(addr, &addr1, &l, true);
+    if (l < 8 || !IS_DIRECT(mr, true)) {
+        release_lock |= prepare_mmio_access(mr);
+
+#if defined(TARGET_WORDS_BIGENDIAN)
+        if (endian == DEVICE_LITTLE_ENDIAN) {
+            val = bswap64(val);
+        }
+#else
+        if (endian == DEVICE_BIG_ENDIAN) {
+            val = bswap64(val);
+        }
+#endif
+        r = memory_region_dispatch_write(mr, addr1, val, 8, attrs);
+    } else {
+        /* RAM case */
+        ptr = MAP_RAM(mr, addr1);
+        switch (endian) {
+        case DEVICE_LITTLE_ENDIAN:
+            stq_le_p(ptr, val);
+            break;
+        case DEVICE_BIG_ENDIAN:
+            stq_be_p(ptr, val);
+            break;
+        default:
+            stq_p(ptr, val);
+            break;
+        }
+        INVALIDATE(mr, addr1, 8);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
+    }
+    if (release_lock) {
+        qemu_mutex_unlock_iothread();
+    }
+    RCU_READ_UNLOCK();
+}
+
+void glue(address_space_stq, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                                             DEVICE_NATIVE_ENDIAN);
+}
+
+void glue(address_space_stq_le, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                                             DEVICE_LITTLE_ENDIAN);
+}
+
+void glue(address_space_stq_be, SUFFIX)(ARG1_DECL,
+    hwaddr addr, uint64_t val, MemTxAttrs attrs, MemTxResult *result)
+{
+    glue(address_space_stq_internal, SUFFIX)(ARG1, addr, val, attrs, result,
+                                             DEVICE_BIG_ENDIAN);
+}
+
+void glue(stq_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
+{
+    glue(address_space_stq, SUFFIX)(ARG1, addr, val,
+                                    MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stq_le_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
+{
+    glue(address_space_stq_le, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void glue(stq_be_phys, SUFFIX)(ARG1_DECL, hwaddr addr, uint64_t val)
+{
+    glue(address_space_stq_be, SUFFIX)(ARG1, addr, val,
+                                       MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+#undef ARG1_DECL
+#undef ARG1
+#undef SUFFIX
+#undef TRANSLATE
+#undef IS_DIRECT
+#undef MAP_RAM
+#undef INVALIDATE
+#undef RCU_READ_LOCK
+#undef RCU_READ_UNLOCK

+ 16 - 4
qemu-timer.c

@@ -174,7 +174,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled)
 
 bool timerlist_has_timers(QEMUTimerList *timer_list)
 {
-    return !!timer_list->active_timers;
+    return !!atomic_read(&timer_list->active_timers);
 }
 
 bool qemu_clock_has_timers(QEMUClockType type)
@@ -187,6 +187,10 @@ bool timerlist_expired(QEMUTimerList *timer_list)
 {
     int64_t expire_time;
 
+    if (!atomic_read(&timer_list->active_timers)) {
+        return false;
+    }
+
     qemu_mutex_lock(&timer_list->active_timers_lock);
     if (!timer_list->active_timers) {
         qemu_mutex_unlock(&timer_list->active_timers_lock);
@@ -214,6 +218,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
     int64_t delta;
     int64_t expire_time;
 
+    if (!atomic_read(&timer_list->active_timers)) {
+        return -1;
+    }
+
     if (!timer_list->clock->enabled) {
         return -1;
     }
@@ -363,7 +371,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
         if (!t)
             break;
         if (t == ts) {
-            *pt = t->next;
+            atomic_set(pt, t->next);
             break;
         }
         pt = &t->next;
@@ -386,7 +394,7 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
     }
     ts->expire_time = MAX(expire_time, 0);
     ts->next = *pt;
-    *pt = ts;
+    atomic_set(pt, ts);
 
     return pt == &timer_list->active_timers;
 }
@@ -481,8 +489,12 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
     QEMUTimerCB *cb;
     void *opaque;
 
+    if (!atomic_read(&timer_list->active_timers)) {
+        return false;
+    }
+
     qemu_event_reset(&timer_list->timers_done_ev);
-    if (!timer_list->clock->enabled || !timer_list->active_timers) {
+    if (!timer_list->clock->enabled) {
         goto out;
     }
 

+ 14 - 12
rules.mak

@@ -7,6 +7,10 @@ MAKEFLAGS += -rR
 
 # Files with this suffixes are final, don't try to generate them
 # using implicit rules
+%/trace-events:
+%.hx:
+%.py:
+%.objs:
 %.d:
 %.h:
 %.c:
@@ -192,15 +196,15 @@ clean: clean-timestamp
 # save-vars
 # Usage: $(call save-vars, vars)
 # Save each variable $v in $vars as save-vars-$v, save their object's
-# variables, then clear $v.
+# variables, then clear $v.  saved-vars-$v contains the variables that
+# where saved for the objects, in order to speedup load-vars.
 define save-vars
     $(foreach v,$1,
         $(eval save-vars-$v := $(value $v))
-        $(foreach o,$($v),
-            $(foreach k,cflags libs objs,
-                $(if $($o-$k),
-                    $(eval save-vars-$o-$k := $($o-$k))
-                    $(eval $o-$k := ))))
+        $(eval saved-vars-$v := $(foreach o,$($v), \
+            $(if $($o-cflags), $o-cflags $(eval save-vars-$o-cflags := $($o-cflags))$(eval $o-cflags := )) \
+            $(if $($o-libs), $o-libs $(eval save-vars-$o-libs := $($o-libs))$(eval $o-libs := )) \
+            $(if $($o-objs), $o-objs $(eval save-vars-$o-objs := $($o-objs))$(eval $o-objs := ))))
         $(eval $v := ))
 endef
 
@@ -213,12 +217,10 @@ define load-vars
     $(eval $2-new-value := $(value $2))
     $(foreach v,$1,
         $(eval $v := $(value save-vars-$v))
-        $(foreach o,$($v),
-            $(foreach k,cflags libs objs,
-                $(if $(save-vars-$o-$k),
-                    $(eval $o-$k := $(save-vars-$o-$k))
-                    $(eval save-vars-$o-$k := ))))
-        $(eval save-vars-$v := ))
+        $(foreach o,$(saved-vars-$v),
+            $(eval $o := $(save-vars-$o)) $(eval save-vars-$o := ))
+        $(eval save-vars-$v := )
+        $(eval saved-vars-$v := ))
     $(eval $2 := $(value $2) $($2-new-value))
 endef
 

+ 37 - 5
target/i386/arch_memory_mapping.c

@@ -220,7 +220,8 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,
 
 /* IA-32e Paging */
 static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
-                       hwaddr pml4e_start_addr, int32_t a20_mask)
+                       hwaddr pml4e_start_addr, int32_t a20_mask,
+                       target_ulong start_line_addr)
 {
     hwaddr pml4e_addr, pdpe_start_addr;
     uint64_t pml4e;
@@ -236,11 +237,34 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
             continue;
         }
 
-        line_addr = ((i & 0x1ffULL) << 39) | (0xffffULL << 48);
+        line_addr = start_line_addr | ((i & 0x1ffULL) << 39);
         pdpe_start_addr = (pml4e & PLM4_ADDR_MASK) & a20_mask;
         walk_pdpe(list, as, pdpe_start_addr, a20_mask, line_addr);
     }
 }
+
+static void walk_pml5e(MemoryMappingList *list, AddressSpace *as,
+                       hwaddr pml5e_start_addr, int32_t a20_mask)
+{
+    hwaddr pml5e_addr, pml4e_start_addr;
+    uint64_t pml5e;
+    target_ulong line_addr;
+    int i;
+
+    for (i = 0; i < 512; i++) {
+        pml5e_addr = (pml5e_start_addr + i * 8) & a20_mask;
+        pml5e = address_space_ldq(as, pml5e_addr, MEMTXATTRS_UNSPECIFIED,
+                                  NULL);
+        if (!(pml5e & PG_PRESENT_MASK)) {
+            /* not present */
+            continue;
+        }
+
+        line_addr = (0x7fULL << 57) | ((i & 0x1ffULL) << 48);
+        pml4e_start_addr = (pml5e & PLM4_ADDR_MASK) & a20_mask;
+        walk_pml4e(list, as, pml4e_start_addr, a20_mask, line_addr);
+    }
+}
 #endif
 
 void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
@@ -257,10 +281,18 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list,
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            hwaddr pml4e_addr;
+            if (env->cr[4] & CR4_LA57_MASK) {
+                hwaddr pml5e_addr;
+
+                pml5e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
+                walk_pml5e(list, cs->as, pml5e_addr, env->a20_mask);
+            } else {
+                hwaddr pml4e_addr;
 
-            pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
-            walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask);
+                pml4e_addr = (env->cr[3] & PLM4_ADDR_MASK) & env->a20_mask;
+                walk_pml4e(list, cs->as, pml4e_addr, env->a20_mask,
+                        0xffffULL << 48);
+            }
         } else
 #endif
         {

+ 7 - 0
target/i386/bpt_helper.c

@@ -244,6 +244,13 @@ void helper_single_step(CPUX86State *env)
     raise_exception(env, EXCP01_DB);
 }
 
+void helper_rechecking_single_step(CPUX86State *env)
+{
+    if ((env->eflags & TF_MASK) != 0) {
+        helper_single_step(env);
+    }
+}
+
 void helper_set_dr(CPUX86State *env, int reg, target_ulong t0)
 {
 #ifndef CONFIG_USER_ONLY

+ 11 - 7
target/i386/cpu.c

@@ -238,7 +238,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
           CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
           CPUID_7_0_EBX_RDSEED */
-#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
+#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | \
+          CPUID_7_0_ECX_LA57)
 #define TCG_7_0_EDX_FEATURES 0
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
@@ -422,7 +423,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "avx512f", "avx512dq", "rdseed", "adx",
             "smap", "avx512ifma", "pcommit", "clflushopt",
             "clwb", NULL, "avx512pf", "avx512er",
-            "avx512cd", NULL, "avx512bw", "avx512vl",
+            "avx512cd", "sha-ni", "avx512bw", "avx512vl",
         },
         .cpuid_eax = 7,
         .cpuid_needs_ecx = true, .cpuid_ecx = 0,
@@ -435,7 +436,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "ospke", NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            "la57", NULL, NULL, NULL,
             NULL, NULL, "rdpid", NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
@@ -2742,10 +2743,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     case 0x80000008:
         /* virtual & phys address size in low 2 bytes. */
         if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
-            /* 64 bit processor, 48 bits virtual, configurable
-             * physical bits.
-             */
-            *eax = 0x00003000 + cpu->phys_bits;
+            /* 64 bit processor */
+            *eax = cpu->phys_bits; /* configurable physical bits */
+            if  (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) {
+                *eax |= 0x00003900; /* 57 bits virtual */
+            } else {
+                *eax |= 0x00003000; /* 48 bits virtual */
+            }
         } else {
             *eax = cpu->phys_bits;
         }

+ 3 - 0
target/i386/cpu.h

@@ -224,6 +224,7 @@
 #define CR4_OSFXSR_SHIFT 9
 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1U << 10)
+#define CR4_LA57_MASK   (1U << 12)
 #define CR4_VMXE_MASK   (1U << 13)
 #define CR4_SMXE_MASK   (1U << 14)
 #define CR4_FSGSBASE_MASK (1U << 16)
@@ -621,6 +622,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
 #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
 #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
+#define CPUID_7_0_EBX_SHA_NI   (1U << 29) /* SHA1/SHA256 Instruction Extensions */
 #define CPUID_7_0_EBX_AVX512BW (1U << 30) /* AVX-512 Byte and Word Instructions */
 #define CPUID_7_0_EBX_AVX512VL (1U << 31) /* AVX-512 Vector Length Extensions */
 
@@ -628,6 +630,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_ECX_UMIP     (1U << 2)
 #define CPUID_7_0_ECX_PKU      (1U << 3)
 #define CPUID_7_0_ECX_OSPKE    (1U << 4)
+#define CPUID_7_0_ECX_LA57     (1U << 16)
 #define CPUID_7_0_ECX_RDPID    (1U << 22)
 
 #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */

+ 39 - 13
target/i386/gdbstub.c

@@ -44,10 +44,22 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
+    /* N.B. GDB can't deal with changes in registers or sizes in the middle
+       of a session. So if we're in 32-bit mode on a 64-bit cpu, still act
+       as if we're on a 64-bit cpu. */
+
     if (n < CPU_NB_REGS) {
-        if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) {
-            return gdb_get_reg64(mem_buf, env->regs[gpr_map[n]]);
-        } else if (n < CPU_NB_REGS32) {
+        if (TARGET_LONG_BITS == 64) {
+            if (env->hflags & HF_CS64_MASK) {
+                return gdb_get_reg64(mem_buf, env->regs[gpr_map[n]]);
+            } else if (n < CPU_NB_REGS32) {
+                return gdb_get_reg64(mem_buf,
+                                     env->regs[gpr_map[n]] & 0xffffffffUL);
+            } else {
+                memset(mem_buf, 0, sizeof(target_ulong));
+                return sizeof(target_ulong);
+            }
+        } else {
             return gdb_get_reg32(mem_buf, env->regs[gpr_map32[n]]);
         }
     } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) {
@@ -60,8 +72,7 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
         return 10;
     } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) {
         n -= IDX_XMM_REGS;
-        if (n < CPU_NB_REGS32 ||
-            (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) {
+        if (n < CPU_NB_REGS32 || TARGET_LONG_BITS == 64) {
             stq_p(mem_buf, env->xmm_regs[n].ZMM_Q(0));
             stq_p(mem_buf + 8, env->xmm_regs[n].ZMM_Q(1));
             return 16;
@@ -69,8 +80,12 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     } else {
         switch (n) {
         case IDX_IP_REG:
-            if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) {
-                return gdb_get_reg64(mem_buf, env->eip);
+            if (TARGET_LONG_BITS == 64) {
+                if (env->hflags & HF_CS64_MASK) {
+                    return gdb_get_reg64(mem_buf, env->eip);
+                } else {
+                    return gdb_get_reg64(mem_buf, env->eip & 0xffffffffUL);
+                }
             } else {
                 return gdb_get_reg32(mem_buf, env->eip);
             }
@@ -151,9 +166,17 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     CPUX86State *env = &cpu->env;
     uint32_t tmp;
 
+    /* N.B. GDB can't deal with changes in registers or sizes in the middle
+       of a session. So if we're in 32-bit mode on a 64-bit cpu, still act
+       as if we're on a 64-bit cpu. */
+
     if (n < CPU_NB_REGS) {
-        if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) {
-            env->regs[gpr_map[n]] = ldtul_p(mem_buf);
+        if (TARGET_LONG_BITS == 64) {
+            if (env->hflags & HF_CS64_MASK) {
+                env->regs[gpr_map[n]] = ldtul_p(mem_buf);
+            } else if (n < CPU_NB_REGS32) {
+                env->regs[gpr_map[n]] = ldtul_p(mem_buf) & 0xffffffffUL;
+            }
             return sizeof(target_ulong);
         } else if (n < CPU_NB_REGS32) {
             n = gpr_map32[n];
@@ -169,8 +192,7 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
         return 10;
     } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) {
         n -= IDX_XMM_REGS;
-        if (n < CPU_NB_REGS32 ||
-            (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK)) {
+        if (n < CPU_NB_REGS32 || TARGET_LONG_BITS == 64) {
             env->xmm_regs[n].ZMM_Q(0) = ldq_p(mem_buf);
             env->xmm_regs[n].ZMM_Q(1) = ldq_p(mem_buf + 8);
             return 16;
@@ -178,8 +200,12 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
     } else {
         switch (n) {
         case IDX_IP_REG:
-            if (TARGET_LONG_BITS == 64 && env->hflags & HF_CS64_MASK) {
-                env->eip = ldq_p(mem_buf);
+            if (TARGET_LONG_BITS == 64) {
+                if (env->hflags & HF_CS64_MASK) {
+                    env->eip = ldq_p(mem_buf);
+                } else {
+                    env->eip = ldq_p(mem_buf) & 0xffffffffUL;
+                }
                 return 8;
             } else {
                 env->eip &= ~0xffffffffUL;

+ 45 - 9
target/i386/helper.c

@@ -651,11 +651,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
     uint32_t hflags;
 
 #if defined(DEBUG_MMU)
-    printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
+    printf("CR4 update: %08x -> %08x\n", (uint32_t)env->cr[4], new_cr4);
 #endif
     if ((new_cr4 ^ env->cr[4]) &
         (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
-         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
+         CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) {
         tlb_flush(CPU(cpu), 1);
     }
 
@@ -757,19 +757,41 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
 
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
+            bool la57 = env->cr[4] & CR4_LA57_MASK;
+            uint64_t pml5e_addr, pml5e;
             uint64_t pml4e_addr, pml4e;
             int32_t sext;
 
             /* test virtual address sign extension */
-            sext = (int64_t)addr >> 47;
+            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
             if (sext != 0 && sext != -1) {
                 env->error_code = 0;
                 cs->exception_index = EXCP0D_GPF;
                 return 1;
             }
 
-            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
-                env->a20_mask;
+            if (la57) {
+                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
+                pml5e = x86_ldq_phys(cs, pml5e_addr);
+                if (!(pml5e & PG_PRESENT_MASK)) {
+                    goto do_fault;
+                }
+                if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
+                    goto do_fault_rsvd;
+                }
+                if (!(pml5e & PG_ACCESSED_MASK)) {
+                    pml5e |= PG_ACCESSED_MASK;
+                    x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
+                }
+                ptep = pml5e ^ PG_NX_MASK;
+            } else {
+                pml5e = env->cr[3];
+                ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
+            }
+
+            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
+                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
             pml4e = x86_ldq_phys(cs, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 goto do_fault;
@@ -781,7 +803,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
                 pml4e |= PG_ACCESSED_MASK;
                 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
             }
-            ptep = pml4e ^ PG_NX_MASK;
+            ptep &= pml4e ^ PG_NX_MASK;
             pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
                 env->a20_mask;
             pdpe = x86_ldq_phys(cs, pdpe_addr);
@@ -1024,16 +1046,30 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
+            bool la57 = env->cr[4] & CR4_LA57_MASK;
+            uint64_t pml5e_addr, pml5e;
             uint64_t pml4e_addr, pml4e;
             int32_t sext;
 
             /* test virtual address sign extension */
-            sext = (int64_t)addr >> 47;
+            sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47;
             if (sext != 0 && sext != -1) {
                 return -1;
             }
-            pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
-                env->a20_mask;
+
+            if (la57) {
+                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                        (((addr >> 48) & 0x1ff) << 3)) & env->a20_mask;
+                pml5e = x86_ldq_phys(cs, pml5e_addr);
+                if (!(pml5e & PG_PRESENT_MASK)) {
+                    return -1;
+                }
+            } else {
+                pml5e = env->cr[3];
+            }
+
+            pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
+                    (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask;
             pml4e = x86_ldq_phys(cs, pml4e_addr);
             if (!(pml4e & PG_PRESENT_MASK)) {
                 return -1;

+ 1 - 0
target/i386/helper.h

@@ -79,6 +79,7 @@ DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
 DEF_HELPER_1(single_step, void, env)
+DEF_HELPER_1(rechecking_single_step, void, env)
 DEF_HELPER_1(cpuid, void, env)
 DEF_HELPER_1(rdtsc, void, env)
 DEF_HELPER_1(rdtscp, void, env)

+ 7 - 0
target/i386/kvm.c

@@ -117,6 +117,13 @@ bool kvm_has_smm(void)
     return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }
 
+bool kvm_has_adjust_clock_stable(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return (ret == KVM_CLOCK_TSC_STABLE);
+}
+
 bool kvm_allows_irq0_override(void)
 {
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();

+ 1 - 0
target/i386/kvm_i386.h

@@ -17,6 +17,7 @@
 
 bool kvm_allows_irq0_override(void);
 bool kvm_has_smm(void);
+bool kvm_has_adjust_clock_stable(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_do_init_vcpu(X86CPU *cs);

+ 180 - 54
target/i386/monitor.c

@@ -30,13 +30,18 @@
 #include "hmp.h"
 
 
-static void print_pte(Monitor *mon, hwaddr addr,
-                      hwaddr pte,
-                      hwaddr mask)
+static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr,
+                      hwaddr pte, hwaddr mask)
 {
 #ifdef TARGET_X86_64
-    if (addr & (1ULL << 47)) {
-        addr |= -1LL << 48;
+    if (env->cr[4] & CR4_LA57_MASK) {
+        if (addr & (1ULL << 56)) {
+            addr |= -1LL << 57;
+        }
+    } else {
+        if (addr & (1ULL << 47)) {
+            addr |= -1LL << 48;
+        }
     }
 #endif
     monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx
@@ -66,13 +71,13 @@ static void tlb_info_32(Monitor *mon, CPUArchState *env)
         if (pde & PG_PRESENT_MASK) {
             if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
                 /* 4M pages */
-                print_pte(mon, (l1 << 22), pde, ~((1 << 21) - 1));
+                print_pte(mon, env, (l1 << 22), pde, ~((1 << 21) - 1));
             } else {
                 for(l2 = 0; l2 < 1024; l2++) {
                     cpu_physical_memory_read((pde & ~0xfff) + l2 * 4, &pte, 4);
                     pte = le32_to_cpu(pte);
                     if (pte & PG_PRESENT_MASK) {
-                        print_pte(mon, (l1 << 22) + (l2 << 12),
+                        print_pte(mon, env, (l1 << 22) + (l2 << 12),
                                   pte & ~PG_PSE_MASK,
                                   ~0xfff);
                     }
@@ -100,7 +105,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                 if (pde & PG_PRESENT_MASK) {
                     if (pde & PG_PSE_MASK) {
                         /* 2M pages with PAE, CR4.PSE is ignored */
-                        print_pte(mon, (l1 << 30 ) + (l2 << 21), pde,
+                        print_pte(mon, env, (l1 << 30) + (l2 << 21), pde,
                                   ~((hwaddr)(1 << 20) - 1));
                     } else {
                         pt_addr = pde & 0x3fffffffff000ULL;
@@ -108,7 +113,7 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
                             cpu_physical_memory_read(pt_addr + l3 * 8, &pte, 8);
                             pte = le64_to_cpu(pte);
                             if (pte & PG_PRESENT_MASK) {
-                                print_pte(mon, (l1 << 30 ) + (l2 << 21)
+                                print_pte(mon, env, (l1 << 30) + (l2 << 21)
                                           + (l3 << 12),
                                           pte & ~PG_PSE_MASK,
                                           ~(hwaddr)0xfff);
@@ -122,61 +127,82 @@ static void tlb_info_pae32(Monitor *mon, CPUArchState *env)
 }
 
 #ifdef TARGET_X86_64
-static void tlb_info_64(Monitor *mon, CPUArchState *env)
+static void tlb_info_la48(Monitor *mon, CPUArchState *env,
+        uint64_t l0, uint64_t pml4_addr)
 {
     uint64_t l1, l2, l3, l4;
     uint64_t pml4e, pdpe, pde, pte;
-    uint64_t pml4_addr, pdp_addr, pd_addr, pt_addr;
+    uint64_t pdp_addr, pd_addr, pt_addr;
 
-    pml4_addr = env->cr[3] & 0x3fffffffff000ULL;
     for (l1 = 0; l1 < 512; l1++) {
         cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
         pml4e = le64_to_cpu(pml4e);
-        if (pml4e & PG_PRESENT_MASK) {
-            pdp_addr = pml4e & 0x3fffffffff000ULL;
-            for (l2 = 0; l2 < 512; l2++) {
-                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
-                pdpe = le64_to_cpu(pdpe);
-                if (pdpe & PG_PRESENT_MASK) {
-                    if (pdpe & PG_PSE_MASK) {
-                        /* 1G pages, CR4.PSE is ignored */
-                        print_pte(mon, (l1 << 39) + (l2 << 30), pdpe,
-                                  0x3ffffc0000000ULL);
-                    } else {
-                        pd_addr = pdpe & 0x3fffffffff000ULL;
-                        for (l3 = 0; l3 < 512; l3++) {
-                            cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
-                            pde = le64_to_cpu(pde);
-                            if (pde & PG_PRESENT_MASK) {
-                                if (pde & PG_PSE_MASK) {
-                                    /* 2M pages, CR4.PSE is ignored */
-                                    print_pte(mon, (l1 << 39) + (l2 << 30) +
-                                              (l3 << 21), pde,
-                                              0x3ffffffe00000ULL);
-                                } else {
-                                    pt_addr = pde & 0x3fffffffff000ULL;
-                                    for (l4 = 0; l4 < 512; l4++) {
-                                        cpu_physical_memory_read(pt_addr
-                                                                 + l4 * 8,
-                                                                 &pte, 8);
-                                        pte = le64_to_cpu(pte);
-                                        if (pte & PG_PRESENT_MASK) {
-                                            print_pte(mon, (l1 << 39) +
-                                                      (l2 << 30) +
-                                                      (l3 << 21) + (l4 << 12),
-                                                      pte & ~PG_PSE_MASK,
-                                                      0x3fffffffff000ULL);
-                                        }
-                                    }
-                                }
-                            }
-                        }
+        if (!(pml4e & PG_PRESENT_MASK)) {
+            continue;
+        }
+
+        pdp_addr = pml4e & 0x3fffffffff000ULL;
+        for (l2 = 0; l2 < 512; l2++) {
+            cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
+            pdpe = le64_to_cpu(pdpe);
+            if (!(pdpe & PG_PRESENT_MASK)) {
+                continue;
+            }
+
+            if (pdpe & PG_PSE_MASK) {
+                /* 1G pages, CR4.PSE is ignored */
+                print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30),
+                        pdpe, 0x3ffffc0000000ULL);
+                continue;
+            }
+
+            pd_addr = pdpe & 0x3fffffffff000ULL;
+            for (l3 = 0; l3 < 512; l3++) {
+                cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
+                pde = le64_to_cpu(pde);
+                if (!(pde & PG_PRESENT_MASK)) {
+                    continue;
+                }
+
+                if (pde & PG_PSE_MASK) {
+                    /* 2M pages, CR4.PSE is ignored */
+                    print_pte(mon, env, (l0 << 48) + (l1 << 39) + (l2 << 30) +
+                            (l3 << 21), pde, 0x3ffffffe00000ULL);
+                    continue;
+                }
+
+                pt_addr = pde & 0x3fffffffff000ULL;
+                for (l4 = 0; l4 < 512; l4++) {
+                    cpu_physical_memory_read(pt_addr
+                            + l4 * 8,
+                            &pte, 8);
+                    pte = le64_to_cpu(pte);
+                    if (pte & PG_PRESENT_MASK) {
+                        print_pte(mon, env, (l0 << 48) + (l1 << 39) +
+                                (l2 << 30) + (l3 << 21) + (l4 << 12),
+                                pte & ~PG_PSE_MASK, 0x3fffffffff000ULL);
                     }
                 }
             }
         }
     }
 }
+
+static void tlb_info_la57(Monitor *mon, CPUArchState *env)
+{
+    uint64_t l0;
+    uint64_t pml5e;
+    uint64_t pml5_addr;
+
+    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
+    for (l0 = 0; l0 < 512; l0++) {
+        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
+        pml5e = le64_to_cpu(pml5e);
+        if (pml5e & PG_PRESENT_MASK) {
+            tlb_info_la48(mon, env, l0, pml5e & 0x3fffffffff000ULL);
+        }
+    }
+}
 #endif /* TARGET_X86_64 */
 
 void hmp_info_tlb(Monitor *mon, const QDict *qdict)
@@ -192,7 +218,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            tlb_info_64(mon, env);
+            if (env->cr[4] & CR4_LA57_MASK) {
+                tlb_info_la57(mon, env);
+            } else {
+                tlb_info_la48(mon, env, 0, env->cr[3] & 0x3fffffffff000ULL);
+            }
         } else
 #endif
         {
@@ -324,7 +354,7 @@ static void mem_info_pae32(Monitor *mon, CPUArchState *env)
 
 
 #ifdef TARGET_X86_64
-static void mem_info_64(Monitor *mon, CPUArchState *env)
+static void mem_info_la48(Monitor *mon, CPUArchState *env)
 {
     int prot, last_prot;
     uint64_t l1, l2, l3, l4;
@@ -400,6 +430,98 @@ static void mem_info_64(Monitor *mon, CPUArchState *env)
     /* Flush last range */
     mem_print(mon, &start, &last_prot, (hwaddr)1 << 48, 0);
 }
+
+static void mem_info_la57(Monitor *mon, CPUArchState *env)
+{
+    int prot, last_prot;
+    uint64_t l0, l1, l2, l3, l4;
+    uint64_t pml5e, pml4e, pdpe, pde, pte;
+    uint64_t pml5_addr, pml4_addr, pdp_addr, pd_addr, pt_addr, start, end;
+
+    pml5_addr = env->cr[3] & 0x3fffffffff000ULL;
+    last_prot = 0;
+    start = -1;
+    for (l0 = 0; l0 < 512; l0++) {
+        cpu_physical_memory_read(pml5_addr + l0 * 8, &pml5e, 8);
+        pml4e = le64_to_cpu(pml5e);
+        end = l0 << 48;
+        if (!(pml5e & PG_PRESENT_MASK)) {
+            prot = 0;
+            mem_print(mon, &start, &last_prot, end, prot);
+            continue;
+        }
+
+        pml4_addr = pml5e & 0x3fffffffff000ULL;
+        for (l1 = 0; l1 < 512; l1++) {
+            cpu_physical_memory_read(pml4_addr + l1 * 8, &pml4e, 8);
+            pml4e = le64_to_cpu(pml4e);
+            end = (l0 << 48) + (l1 << 39);
+            if (!(pml4e & PG_PRESENT_MASK)) {
+                prot = 0;
+                mem_print(mon, &start, &last_prot, end, prot);
+                continue;
+            }
+
+            pdp_addr = pml4e & 0x3fffffffff000ULL;
+            for (l2 = 0; l2 < 512; l2++) {
+                cpu_physical_memory_read(pdp_addr + l2 * 8, &pdpe, 8);
+                pdpe = le64_to_cpu(pdpe);
+                end = (l0 << 48) + (l1 << 39) + (l2 << 30);
+                if (pdpe & PG_PRESENT_MASK) {
+                    prot = 0;
+                    mem_print(mon, &start, &last_prot, end, prot);
+                    continue;
+                }
+
+                if (pdpe & PG_PSE_MASK) {
+                    prot = pdpe & (PG_USER_MASK | PG_RW_MASK |
+                            PG_PRESENT_MASK);
+                    prot &= pml4e;
+                    mem_print(mon, &start, &last_prot, end, prot);
+                    continue;
+                }
+
+                pd_addr = pdpe & 0x3fffffffff000ULL;
+                for (l3 = 0; l3 < 512; l3++) {
+                    cpu_physical_memory_read(pd_addr + l3 * 8, &pde, 8);
+                    pde = le64_to_cpu(pde);
+                    end = (l0 << 48) + (l1 << 39) + (l2 << 30) + (l3 << 21);
+                    if (pde & PG_PRESENT_MASK) {
+                        prot = 0;
+                        mem_print(mon, &start, &last_prot, end, prot);
+                        continue;
+                    }
+
+                    if (pde & PG_PSE_MASK) {
+                        prot = pde & (PG_USER_MASK | PG_RW_MASK |
+                                PG_PRESENT_MASK);
+                        prot &= pml4e & pdpe;
+                        mem_print(mon, &start, &last_prot, end, prot);
+                        continue;
+                    }
+
+                    pt_addr = pde & 0x3fffffffff000ULL;
+                    for (l4 = 0; l4 < 512; l4++) {
+                        cpu_physical_memory_read(pt_addr + l4 * 8, &pte, 8);
+                        pte = le64_to_cpu(pte);
+                        end = (l0 << 48) + (l1 << 39) + (l2 << 30) +
+                            (l3 << 21) + (l4 << 12);
+                        if (pte & PG_PRESENT_MASK) {
+                            prot = pte & (PG_USER_MASK | PG_RW_MASK |
+                                    PG_PRESENT_MASK);
+                            prot &= pml4e & pdpe & pde;
+                        } else {
+                            prot = 0;
+                        }
+                        mem_print(mon, &start, &last_prot, end, prot);
+                    }
+                }
+            }
+        }
+    }
+    /* Flush last range */
+    mem_print(mon, &start, &last_prot, (hwaddr)1 << 57, 0);
+}
 #endif /* TARGET_X86_64 */
 
 void hmp_info_mem(Monitor *mon, const QDict *qdict)
@@ -415,7 +537,11 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
     if (env->cr[4] & CR4_PAE_MASK) {
 #ifdef TARGET_X86_64
         if (env->hflags & HF_LMA_MASK) {
-            mem_info_64(mon, env);
+            if (env->cr[4] & CR4_LA57_MASK) {
+                mem_info_la57(mon, env);
+            } else {
+                mem_info_la48(mon, env);
+            }
         } else
 #endif
         {

+ 24 - 5
target/i386/translate.c

@@ -2500,8 +2500,10 @@ static void gen_bnd_jmp(DisasContext *s)
 }
 
 /* Generate an end of block. Trace exception is also generated if needed.
-   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
-static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
+   If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
+   S->TF.  This is used by the syscall/sysret insns.  */
+static void gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
 {
     gen_update_cc_op(s);
 
@@ -2517,6 +2519,9 @@ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
     }
     if (s->singlestep_enabled) {
         gen_helper_debug(cpu_env);
+    } else if (recheck_tf) {
+        gen_helper_rechecking_single_step(cpu_env);
+        tcg_gen_exit_tb(0);
     } else if (s->tf) {
         gen_helper_single_step(cpu_env);
     } else {
@@ -2525,10 +2530,17 @@ static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+/* End of block.
+   If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
+static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
+{
+    gen_eob_worker(s, inhibit, false);
+}
+
 /* End of block, resetting the inhibit irq flag.  */
 static void gen_eob(DisasContext *s)
 {
-    gen_eob_inhibit_irq(s, false);
+    gen_eob_worker(s, false, false);
 }
 
 /* generate a jump to eip. No segment change must happen before as a
@@ -6423,7 +6435,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                                       tcg_const_i32(s->pc - s->cs_base));
             set_cc_op(s, CC_OP_EFLAGS);
         }
-        gen_eob(s);
+        /* TF handling for the syscall insn is different. The TF bit is checked
+           after the syscall insn completes. This allows #DB to not be
+           generated after one has entered CPL0 if TF is set in FMASK.  */
+        gen_eob_worker(s, false, true);
         break;
     case 0xe8: /* call im */
         {
@@ -7115,7 +7130,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (s->lma) {
                 set_cc_op(s, CC_OP_EFLAGS);
             }
-            gen_eob(s);
+            /* TF handling for the sysret insn is different. The TF bit is
+               checked after the sysret insn completes. This allows #DB to be
+               generated "as if" the syscall insn in userspace has just
+               completed.  */
+            gen_eob_worker(s, false, true);
         }
         break;
 #endif