浏览代码

Revert "build: update QEMU to v8.0.0-utm"

This reverts commit 036f58747eb9091da59b4d8322e1a11d52ad158b.
osy 2 年之前
父节点
当前提交
cbc26a2294

文件差异内容过多而无法显示
+ 70 - 104
Configuration/QEMUConstantGenerated.swift


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/edk2-aarch64-code.fd.bz2 → patches/data/qemu-7.2.0-utm/pc-bios/edk2-aarch64-code.fd.bz2


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/edk2-arm-vars.fd.bz2 → patches/data/qemu-7.2.0-utm/pc-bios/edk2-arm-vars.fd.bz2


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/edk2-i386-code.fd.bz2 → patches/data/qemu-7.2.0-utm/pc-bios/edk2-i386-code.fd.bz2


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/edk2-i386-vars.fd.bz2 → patches/data/qemu-7.2.0-utm/pc-bios/edk2-i386-vars.fd.bz2


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/edk2-x86_64-code.fd.bz2 → patches/data/qemu-7.2.0-utm/pc-bios/edk2-x86_64-code.fd.bz2


+ 0 - 0
patches/data/qemu-8.0.2-utm/pc-bios/openbios-ppc → patches/data/qemu-7.2.0-utm/pc-bios/openbios-ppc


+ 4569 - 0
patches/qemu-7.2.0-utm.patch

@@ -0,0 +1,4569 @@
+From bb5c41eb13130dada2f3cd766da9a537ef466a4b Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Tue, 20 Dec 2022 15:56:07 -0800
+Subject: [PATCH 1/3] spice-display: fix memory leak issues
+
+1) Some of the error cases did not free GL memory.
+2) Remove some unneeded logic and simplify the code.
+3) Add a `eglMakeCurrent` to `spice_iosurface_destroy` to make sure we are
+   freeing objects in the right context.
+---
+ ui/egl-helpers.c   |  1 +
+ ui/spice-display.c | 29 ++++++-----------------------
+ 2 files changed, 7 insertions(+), 23 deletions(-)
+
+diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
+index 0df9dd8fd5..a636e5f2f2 100644
+--- a/ui/egl-helpers.c
++++ b/ui/egl-helpers.c
+@@ -389,6 +389,7 @@ EGLSurface qemu_egl_init_buffer_surface(EGLContext ectx,
+     b = eglMakeCurrent(qemu_egl_display, esurface, esurface, ectx);
+     if (b == EGL_FALSE) {
+         error_report("egl: eglMakeCurrent failed");
++        qemu_egl_destroy_surface(esurface);
+         return NULL;
+     }
+ 
+diff --git a/ui/spice-display.c b/ui/spice-display.c
+index 4e4791484c..c1d2a66fc5 100644
+--- a/ui/spice-display.c
++++ b/ui/spice-display.c
+@@ -847,7 +847,7 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
+                            EGL_BIND_TO_TEXTURE_TARGET_ANGLE,
+                            &target) != EGL_TRUE) {
+         error_report("spice_iosurface_create: eglGetConfigAttrib failed");
+-        return 0;
++        goto gl_error;
+     }
+     if (target == EGL_TEXTURE_2D) {
+         tex_target = GL_TEXTURE_2D;
+@@ -855,7 +855,7 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
+         tex_target = GL_TEXTURE_RECTANGLE_ANGLE;
+     } else {
+         error_report("spice_iosurface_create: unsupported texture target");
+-        return 0;
++        goto gl_error;
+     }
+ 
+     const EGLint attribs[] = {
+@@ -880,6 +880,8 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
+ 
+     egl_fb_setup_new_tex_target(&ssd->iosurface_fb, width, height, tex_target);
+ 
++    eglBindTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
++
+     return 1;
+ gl_error:
+     CFRelease(ssd->iosurface);
+@@ -897,6 +899,8 @@ static void spice_iosurface_destroy(SimpleSpiceDisplay *ssd)
+         return;
+     }
+ #if defined(CONFIG_ANGLE)
++    eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
++    eglReleaseTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
+     egl_fb_destroy(&ssd->iosurface_fb);
+     qemu_egl_destroy_surface(ssd->esurface);
+     ssd->esurface = EGL_NO_SURFACE;
+@@ -963,23 +967,10 @@ static void spice_iosurface_blit(SimpleSpiceDisplay *ssd, GLuint src_texture, bo
+ #if defined(CONFIG_ANGLE)
+     eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
+     glBindTexture(ssd->iosurface_fb.texture_target, ssd->iosurface_fb.texture);
+-    eglBindTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
+     egl_texture_blit(ssd->gls, &ssd->iosurface_fb, &tmp_fb, flip, swap);
+ #endif
+ }
+ 
+-static void spice_iosurface_flush(SimpleSpiceDisplay *ssd)
+-{
+-    if (!ssd->iosurface) {
+-        return;
+-    }
+-
+-#if defined(CONFIG_ANGLE)
+-    eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
+-    eglReleaseTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
+-#endif
+-}
+-
+ #endif
+ 
+ static void qemu_spice_gl_monitor_config(SimpleSpiceDisplay *ssd,
+@@ -1043,9 +1034,6 @@ static void spice_gl_refresh(DisplayChangeListener *dcl)
+     graphic_hw_update(dcl->con);
+     if (ssd->gl_updates && ssd->have_surface) {
+         qemu_spice_gl_block(ssd, true);
+-#if defined(CONFIG_IOSURFACE)
+-        spice_iosurface_flush(ssd);
+-#endif
+         glFlush();
+         cookie = (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_GL_DRAW_DONE, 0);
+         spice_qxl_gl_draw_async(&ssd->qxl, 0, 0,
+@@ -1079,10 +1067,6 @@ static void spice_gl_switch(DisplayChangeListener *dcl,
+     int width = 0, height = 0;
+ 
+     if (ssd->ds) {
+-#if defined(CONFIG_IOSURFACE)
+-        // need to release texture from surface before destorying it
+-        spice_iosurface_flush(ssd);
+-#endif
+         surface_gl_destroy_texture(ssd->gls, ssd->ds);
+     }
+     ssd->ds = new_surface;
+@@ -1346,7 +1330,6 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl,
+     GLuint tex_id = ssd->backing_borrow(ssd->backing_id, &y_0_top,
+                                         NULL, NULL);
+     spice_iosurface_blit(ssd, tex_id, !y_0_top, false);
+-    spice_iosurface_flush(ssd);
+     //TODO: cursor stuff
+ #endif
+ 
+-- 
+2.28.0
+
+From 34b035535eee0f8497a1492ae1d9478dc9c7e7a0 Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Sat, 24 Dec 2022 17:08:52 -0800
+Subject: [PATCH 2/3] spice-display: remove redundant glBindTexture
+
+---
+ ui/spice-display.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/ui/spice-display.c b/ui/spice-display.c
+index c1d2a66fc5..610edaa089 100644
+--- a/ui/spice-display.c
++++ b/ui/spice-display.c
+@@ -966,7 +966,6 @@ static void spice_iosurface_blit(SimpleSpiceDisplay *ssd, GLuint src_texture, bo
+ 
+ #if defined(CONFIG_ANGLE)
+     eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
+-    glBindTexture(ssd->iosurface_fb.texture_target, ssd->iosurface_fb.texture);
+     egl_texture_blit(ssd->gls, &ssd->iosurface_fb, &tmp_fb, flip, swap);
+ #endif
+ }
+-- 
+2.28.0
+
+From bbc1efd4ca66d0892f2bced95fc6150192585a12 Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Sun, 25 Dec 2022 00:46:42 -0800
+Subject: [PATCH 3/3] Revert "virtio-blk: use BDRV_REQ_REGISTERED_BUF
+ optimization hint"
+
+This reverts commit baf422684d73c7bf38e2c18815e18d44fcf395b6.
+---
+ hw/block/virtio-blk.c          | 39 ++++++++++++----------------------
+ include/hw/virtio/virtio-blk.h |  2 --
+ 2 files changed, 14 insertions(+), 27 deletions(-)
+
+diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
+index f717550fdc..8131ec2dbc 100644
+--- a/hw/block/virtio-blk.c
++++ b/hw/block/virtio-blk.c
+@@ -21,7 +21,6 @@
+ #include "hw/block/block.h"
+ #include "hw/qdev-properties.h"
+ #include "sysemu/blockdev.h"
+-#include "sysemu/block-ram-registrar.h"
+ #include "sysemu/sysemu.h"
+ #include "sysemu/runstate.h"
+ #include "hw/virtio/virtio-blk.h"
+@@ -363,14 +362,12 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+     }
+ }
+ 
+-static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
++static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
+                                    int start, int num_reqs, int niov)
+ {
+-    BlockBackend *blk = s->blk;
+     QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
+     int64_t sector_num = mrb->reqs[start]->sector_num;
+     bool is_write = mrb->is_write;
+-    BdrvRequestFlags flags = 0;
+ 
+     if (num_reqs > 1) {
+         int i;
+@@ -401,18 +398,12 @@ static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
+                               num_reqs - 1);
+     }
+ 
+-    if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
+-        flags |= BDRV_REQ_REGISTERED_BUF;
+-    }
+-
+     if (is_write) {
+-        blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+-                        flags, virtio_blk_rw_complete,
+-                        mrb->reqs[start]);
++        blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
++                        virtio_blk_rw_complete, mrb->reqs[start]);
+     } else {
+-        blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+-                       flags, virtio_blk_rw_complete,
+-                       mrb->reqs[start]);
++        blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
++                       virtio_blk_rw_complete, mrb->reqs[start]);
+     }
+ }
+ 
+@@ -434,14 +425,14 @@ static int multireq_compare(const void *a, const void *b)
+     }
+ }
+ 
+-static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
++static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+ {
+     int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
+     uint32_t max_transfer;
+     int64_t sector_num = 0;
+ 
+     if (mrb->num_reqs == 1) {
+-        submit_requests(s, mrb, 0, 1, -1);
++        submit_requests(blk, mrb, 0, 1, -1);
+         mrb->num_reqs = 0;
+         return;
+     }
+@@ -461,11 +452,11 @@ static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
+              * 3. merge would exceed maximum transfer length of backend device
+              */
+             if (sector_num + nb_sectors != req->sector_num ||
+-                niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
++                niov > blk_get_max_iov(blk) - req->qiov.niov ||
+                 req->qiov.size > max_transfer ||
+                 nb_sectors > (max_transfer -
+                               req->qiov.size) / BDRV_SECTOR_SIZE) {
+-                submit_requests(s, mrb, start, num_reqs, niov);
++                submit_requests(blk, mrb, start, num_reqs, niov);
+                 num_reqs = 0;
+             }
+         }
+@@ -481,7 +472,7 @@ static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
+         num_reqs++;
+     }
+ 
+-    submit_requests(s, mrb, start, num_reqs, niov);
++    submit_requests(blk, mrb, start, num_reqs, niov);
+     mrb->num_reqs = 0;
+ }
+ 
+@@ -496,7 +487,7 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
+      * Make sure all outstanding writes are posted to the backing device.
+      */
+     if (mrb->is_write && mrb->num_reqs > 0) {
+-        virtio_blk_submit_multireq(s, mrb);
++        virtio_blk_submit_multireq(s->blk, mrb);
+     }
+     blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
+ }
+@@ -676,7 +667,7 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
+         if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
+                                   is_write != mrb->is_write ||
+                                   !s->conf.request_merging)) {
+-            virtio_blk_submit_multireq(s, mrb);
++            virtio_blk_submit_multireq(s->blk, mrb);
+         }
+ 
+         assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
+@@ -783,7 +774,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
+     } while (!virtio_queue_empty(vq));
+ 
+     if (mrb.num_reqs) {
+-        virtio_blk_submit_multireq(s, &mrb);
++        virtio_blk_submit_multireq(s->blk, &mrb);
+     }
+ 
+     blk_io_unplug(s->blk);
+@@ -832,7 +823,7 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
+     }
+ 
+     if (mrb.num_reqs) {
+-        virtio_blk_submit_multireq(s, &mrb);
++        virtio_blk_submit_multireq(s->blk, &mrb);
+     }
+     if (is_bh) {
+         blk_dec_in_flight(s->conf.conf.blk);
+@@ -1214,7 +1205,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
+     }
+ 
+     s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+-    blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
+     blk_set_dev_ops(s->blk, &virtio_block_ops, s);
+ 
+     blk_iostatus_enable(s->blk);
+@@ -1240,7 +1230,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
+         virtio_del_queue(vdev, i);
+     }
+     qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
+-    blk_ram_registrar_destroy(&s->blk_ram_registrar);
+     qemu_del_vm_change_state_handler(s->change);
+     blockdev_mark_auto_del(s->blk);
+     virtio_cleanup(vdev);
+diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
+index 7f589b4146..d311c57cca 100644
+--- a/include/hw/virtio/virtio-blk.h
++++ b/include/hw/virtio/virtio-blk.h
+@@ -19,7 +19,6 @@
+ #include "hw/block/block.h"
+ #include "sysemu/iothread.h"
+ #include "sysemu/block-backend.h"
+-#include "sysemu/block-ram-registrar.h"
+ #include "qom/object.h"
+ 
+ #define TYPE_VIRTIO_BLK "virtio-blk-device"
+@@ -65,7 +64,6 @@ struct VirtIOBlock {
+     struct VirtIOBlockDataPlane *dataplane;
+     uint64_t host_features;
+     size_t config_size;
+-    BlockRAMRegistrar blk_ram_registrar;
+ };
+ 
+ typedef struct VirtIOBlockReq {
+-- 
+2.28.0
+
+From 531da34587b38c64787cb25b1de1c5d13f75def8 Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Wed, 28 Dec 2022 16:50:49 -0800
+Subject: [PATCH] hvf: support TSO mode (private feature)
+
+Apple Silicon supports TSO mode which can be used for emulating strong
+memory ordering in the guest. This feature requires the private entitlement
+`com.apple.private.hypervisor` as well as a private function to modify
+ACTLR_EL1 not exposed by the public Hypervisor framework.
+---
+ accel/hvf/hvf-accel-ops.c     | 51 ++++++++++++++++++++++++++---------
+ include/sysemu/hvf_int.h      | 13 +++++++++
+ meson.build                   |  1 +
+ meson_options.txt             |  2 ++
+ scripts/meson-buildoptions.sh |  3 +++
+ target/arm/hvf/hvf.c          | 28 +++++++++++++++++++
+ target/i386/hvf/hvf.c         |  5 ++++
+ 7 files changed, 90 insertions(+), 13 deletions(-)
+
+diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
+index 24913ca9c4..b414e240ec 100644
+--- a/accel/hvf/hvf-accel-ops.c
++++ b/accel/hvf/hvf-accel-ops.c
+@@ -57,13 +57,10 @@
+ #include "sysemu/hvf_int.h"
+ #include "sysemu/runstate.h"
+ #include "qemu/guest-random.h"
++#include "hw/boards.h"
+ 
+ HVFState *hvf_state;
+ 
+-#ifdef __aarch64__
+-#define HV_VM_DEFAULT NULL
+-#endif
+-
+ /* Memory slots */
+ 
+ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
+@@ -319,25 +316,44 @@ bool hvf_allowed;
+ 
+ static int hvf_accel_init(MachineState *ms)
+ {
+-    int x;
+     hv_return_t ret;
+-    HVFState *s;
++    HVFState *s = HVF_STATE(ms->accelerator);
+ 
+-    ret = hv_vm_create(HV_VM_DEFAULT);
++    ret = hvf_arch_vm_create(s);
+     assert_hvf_ok(ret);
+ 
+-    s = g_new0(HVFState, 1);
++    hvf_state = s;
++    memory_listener_register(&hvf_memory_listener, &address_space_memory);
++
++    return hvf_arch_init();
++}
++
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
++
++static bool hvf_get_tso(Object *obj, Error **errp)
++{
++    HVFState *s = HVF_STATE(obj);
++    return s->tso_mode;
++}
++
++static void hvf_set_tso(Object *obj, bool value, Error **errp)
++{
++    HVFState *s = HVF_STATE(obj);
++    s->tso_mode = value;
++}
++
++#endif
++
++static void hvf_accel_instance_init(Object *obj)
++{
++    int x;
++    HVFState *s = HVF_STATE(obj);
+ 
+     s->num_slots = ARRAY_SIZE(s->slots);
+     for (x = 0; x < s->num_slots; ++x) {
+         s->slots[x].size = 0;
+         s->slots[x].slot_id = x;
+     }
+-
+-    hvf_state = s;
+-    memory_listener_register(&hvf_memory_listener, &address_space_memory);
+-
+-    return hvf_arch_init();
+ }
+ 
+ static void hvf_accel_class_init(ObjectClass *oc, void *data)
+@@ -346,12 +362,21 @@ static void hvf_accel_class_init(ObjectClass *oc, void *data)
+     ac->name = "HVF";
+     ac->init_machine = hvf_accel_init;
+     ac->allowed = &hvf_allowed;
++
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
++    object_class_property_add_bool(oc, "tso",
++        hvf_get_tso, hvf_set_tso);
++    object_class_property_set_description(oc, "tso",
++        "Set on/off to enable/disable total store ordering mode");
++#endif
+ }
+ 
+ static const TypeInfo hvf_accel_type = {
+     .name = TYPE_HVF_ACCEL,
+     .parent = TYPE_ACCEL,
++    .instance_init = hvf_accel_instance_init,
+     .class_init = hvf_accel_class_init,
++    .instance_size = sizeof(HVFState),
+ };
+ 
+ static void hvf_type_init(void)
+diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
+index 6545f7cd61..9f550b9f8b 100644
+--- a/include/sysemu/hvf_int.h
++++ b/include/sysemu/hvf_int.h
+@@ -17,6 +17,15 @@
+ #include <Hypervisor/hv.h>
+ #endif
+ 
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
++extern hv_return_t _hv_vm_config_set_isa(hv_vm_config_t config, uint32_t isa);
++extern hv_return_t _hv_vcpu_get_actlr(hv_vcpu_t vcpu, uint64_t* value);
++extern hv_return_t _hv_vcpu_set_actlr(hv_vcpu_t vcpu, uint64_t value);
++
++#define HV_VM_CONFIG_ISA_PRIVATE (3)
++#define ACTLR_EL1_TSO_ENABLE_MASK ((1 << 1) | (1 << 9))
++#endif
++
+ /* hvf_slot flags */
+ #define HVF_SLOT_LOG (1 << 0)
+ 
+@@ -45,6 +54,9 @@ struct HVFState {
+ 
+     hvf_vcpu_caps *hvf_caps;
+     uint64_t vtimer_offset;
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
++    bool tso_mode;
++#endif
+ };
+ extern HVFState *hvf_state;
+ 
+@@ -56,6 +68,7 @@ struct hvf_vcpu_state {
+ };
+ 
+ void assert_hvf_ok(hv_return_t ret);
++hv_return_t hvf_arch_vm_create(HVFState *s);
+ int hvf_arch_init(void);
+ int hvf_arch_init_vcpu(CPUState *cpu);
+ void hvf_arch_vcpu_destroy(CPUState *cpu);
+diff --git a/meson.build b/meson.build
+index 00fccfc676..ab6a60d1a8 100644
+--- a/meson.build
++++ b/meson.build
+@@ -440,6 +440,7 @@ if get_option('hvf').allowed()
+                    required: get_option('hvf'))
+   if hvf.found()
+     accelerators += 'CONFIG_HVF'
++    config_host_data.set('CONFIG_HVF_PRIVATE', get_option('hvf_private'))
+   endif
+ endif
+ if get_option('hax').allowed()
+diff --git a/meson_options.txt b/meson_options.txt
+index 43916078c8..8415d45071 100644
+--- a/meson_options.txt
++++ b/meson_options.txt
+@@ -72,6 +72,8 @@ option('whpx', type: 'feature', value: 'auto',
+        description: 'WHPX acceleration support')
+ option('hvf', type: 'feature', value: 'auto',
+        description: 'HVF acceleration support')
++option('hvf_private', type: 'boolean', value: 'false',
++       description: 'HVF private features (entitlements required)')
+ option('nvmm', type: 'feature', value: 'auto',
+        description: 'NVMM acceleration support')
+ option('xen', type: 'feature', value: 'auto',
+diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
+index 2496991056..010515ac98 100644
+--- a/scripts/meson-buildoptions.sh
++++ b/scripts/meson-buildoptions.sh
+@@ -26,6 +26,7 @@ meson_options_help() {
+   printf "%s\n" '  --enable-fuzzing         build fuzzing targets'
+   printf "%s\n" '  --enable-gcov            Enable coverage tracking.'
+   printf "%s\n" '  --enable-gprof           QEMU profiling with gprof'
++  printf "%s\n" '  --enable-hvf-private     HVF private features (entitlements required)'
+   printf "%s\n" '  --enable-lto             Use link time optimization'
+   printf "%s\n" '  --enable-malloc=CHOICE   choose memory allocator to use [system] (choices:'
+   printf "%s\n" '                           jemalloc/system/tcmalloc)'
+@@ -289,6 +290,8 @@ _meson_option_parse() {
+     --disable-hax) printf "%s" -Dhax=disabled ;;
+     --enable-hvf) printf "%s" -Dhvf=enabled ;;
+     --disable-hvf) printf "%s" -Dhvf=disabled ;;
++    --enable-hvf-private) printf "%s" -Dhvf_private=true ;;
++    --disable-hvf-private) printf "%s" -Dhvf_private=false ;;
+     --iasl=*) quote_sh "-Diasl=$2" ;;
+     --enable-iconv) printf "%s" -Diconv=enabled ;;
+     --disable-iconv) printf "%s" -Diconv=disabled ;;
+diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
+index 2c0323fe7f..bb7a4d5004 100644
+--- a/target/arm/hvf/hvf.c
++++ b/target/arm/hvf/hvf.c
+@@ -623,6 +623,18 @@ int hvf_arch_init_vcpu(CPUState *cpu)
+                               &arm_cpu->isar.id_aa64mmfr0);
+     assert_hvf_ok(ret);
+ 
++#if defined(CONFIG_HVF_PRIVATE)
++    /* enable TSO mode */
++    if (hvf_state->tso_mode) {
++        uint64_t actlr;
++        ret = _hv_vcpu_get_actlr(cpu->hvf->fd, &actlr);
++        assert_hvf_ok(ret);
++        actlr |= ACTLR_EL1_TSO_ENABLE_MASK;
++        ret = _hv_vcpu_set_actlr(cpu->hvf->fd, actlr);
++        assert_hvf_ok(ret);
++    }
++#endif
++
+     return 0;
+ }
+ 
+@@ -1343,6 +1355,22 @@ static void hvf_vm_state_change(void *opaque, bool running, RunState state)
+     }
+ }
+ 
++hv_return_t hvf_arch_vm_create(HVFState *s)
++{
++#if defined(CONFIG_HVF_PRIVATE)
++    hv_return_t ret;
++    hv_vm_config_t config = hv_vm_config_create();
++    if (s->tso_mode) {
++        _hv_vm_config_set_isa(config, HV_VM_CONFIG_ISA_PRIVATE);
++    }
++    ret = hv_vm_create(config);
++    os_release(config);
++    return ret;
++#else
++    return hv_vm_create(NULL);
++#endif
++}
++
+ int hvf_arch_init(void)
+ {
+     hvf_state->vtimer_offset = mach_absolute_time();
+diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
+index 8d2248bb3f..8283a9b761 100644
+--- a/target/i386/hvf/hvf.c
++++ b/target/i386/hvf/hvf.c
+@@ -212,6 +212,11 @@ void hvf_kick_vcpu_thread(CPUState *cpu)
+     cpus_kick_thread(cpu);
+ }
+ 
++hv_return_t hvf_arch_vm_create(HVFState *s)
++{
++    return hv_vm_create(HV_VM_DEFAULT);
++}
++
+ int hvf_arch_init(void)
+ {
+     return 0;
+=======
+From c874e68e5a1635326f8a2f52320b8dbe82f6be51 Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Fri, 30 Dec 2022 20:24:00 -0800
+Subject: [PATCH] tcti: disable TARGET_TB_PCREL for TCTI
+
+It is currently not supported.
+---
+ target/arm/cpu-param.h  | 2 ++
+ target/i386/cpu-param.h | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
+index 53cac9c89b..6c4af8f0d2 100644
+--- a/target/arm/cpu-param.h
++++ b/target/arm/cpu-param.h
+@@ -31,7 +31,9 @@
+ # define TARGET_PAGE_BITS_VARY
+ # define TARGET_PAGE_BITS_MIN  10
+ 
++#ifndef CONFIG_TCG_THREADED_INTERPRETER
+ # define TARGET_TB_PCREL 1
++#endif
+ 
+ /*
+  * Cache the attrs and shareability fields from the page table entry.
+diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h
+index f579b16bd2..0975265ff3 100644
+--- a/target/i386/cpu-param.h
++++ b/target/i386/cpu-param.h
+@@ -25,7 +25,7 @@
+ #define TARGET_PAGE_BITS 12
+ #define NB_MMU_MODES 5
+ 
+-#ifndef CONFIG_USER_ONLY
++#if !defined(CONFIG_USER_ONLY) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
+ # define TARGET_TB_PCREL 1
+ #endif
+ 
+-- 
+2.28.0
+
+From 80c0e3099fe82b61e7a094f9f24a3c4aa030d5f3 Mon Sep 17 00:00:00 2001
+From: osy <50960678+osy@users.noreply.github.com>
+Date: Sun, 1 Jan 2023 16:51:56 -0800
+Subject: [PATCH] vmnet: stop recieving events when VM is stopped
+
+When the VM is stopped using the HMP command "stop", soon the handler will
+stop reading from the vmnet interface. This causes a flood of
+`VMNET_INTERFACE_PACKETS_AVAILABLE` events to arrive and puts the host CPU
+at 100%. We fix this by removing the event handler from vmnet when the VM
+is no longer in a running state and restore it when we return to a running
+state.
+---
+ net/vmnet-common.m | 48 +++++++++++++++++++++++++++++++++-------------
+ net/vmnet_int.h    |  2 ++
+ 2 files changed, 37 insertions(+), 13 deletions(-)
+
+diff --git a/net/vmnet-common.m b/net/vmnet-common.m
+index 2cb60b9ddd..2958283485 100644
+--- a/net/vmnet-common.m
++++ b/net/vmnet-common.m
+@@ -17,6 +17,7 @@
+ #include "clients.h"
+ #include "qemu/error-report.h"
+ #include "qapi/error.h"
++#include "sysemu/runstate.h"
+ 
+ #include <vmnet/vmnet.h>
+ #include <dispatch/dispatch.h>
+@@ -242,6 +243,35 @@ static void vmnet_bufs_init(VmnetState *s)
+     }
+ }
+ 
++/**
++ * Called on state change to un-register/re-register handlers
++ */
++static void vmnet_vm_state_change_cb(void *opaque, bool running, RunState state)
++{
++    VmnetState *s = opaque;
++
++    if (running) {
++        vmnet_interface_set_event_callback(
++            s->vmnet_if,
++            VMNET_INTERFACE_PACKETS_AVAILABLE,
++            s->if_queue,
++            ^(interface_event_t event_id, xpc_object_t event) {
++                assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
++                /*
++                 * This function is being called from a non qemu thread, so
++                 * we only schedule a BH, and do the rest of the io completion
++                 * handling from vmnet_send_bh() which runs in a qemu context.
++                 */
++                qemu_bh_schedule(s->send_bh);
++            });
++    } else {
++        vmnet_interface_set_event_callback(
++            s->vmnet_if,
++            VMNET_INTERFACE_PACKETS_AVAILABLE,
++            NULL,
++            NULL);
++    }
++}
+ 
+ int vmnet_if_create(NetClientState *nc,
+                     xpc_object_t if_desc,
+@@ -329,19 +359,9 @@ int vmnet_if_create(NetClientState *nc,
+     s->packets_send_current_pos = 0;
+     s->packets_send_end_pos = 0;
+ 
+-    vmnet_interface_set_event_callback(
+-        s->vmnet_if,
+-        VMNET_INTERFACE_PACKETS_AVAILABLE,
+-        s->if_queue,
+-        ^(interface_event_t event_id, xpc_object_t event) {
+-            assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
+-            /*
+-             * This function is being called from a non qemu thread, so
+-             * we only schedule a BH, and do the rest of the io completion
+-             * handling from vmnet_send_bh() which runs in a qemu context.
+-             */
+-            qemu_bh_schedule(s->send_bh);
+-        });
++    vmnet_vm_state_change_cb(s, 1, RUN_STATE_RUNNING);
++
++    s->change = qemu_add_vm_change_state_handler(vmnet_vm_state_change_cb, s);
+ 
+     return 0;
+ }
+@@ -356,6 +376,8 @@ void vmnet_cleanup_common(NetClientState *nc)
+         return;
+     }
+ 
++    vmnet_vm_state_change_cb(s, 0, RUN_STATE_SHUTDOWN);
++    qemu_del_vm_change_state_handler(s->change);
+     if_stopped_sem = dispatch_semaphore_create(0);
+     vmnet_stop_interface(
+         s->vmnet_if,
+diff --git a/net/vmnet_int.h b/net/vmnet_int.h
+index adf6e8c20d..ffba92108f 100644
+--- a/net/vmnet_int.h
++++ b/net/vmnet_int.h
+@@ -46,6 +46,8 @@ typedef struct VmnetState {
+     int packets_send_end_pos;
+ 
+     struct iovec iov_buf[VMNET_PACKETS_LIMIT];
++
++    VMChangeStateEntry *change;
+ } VmnetState;
+ 
+ const char *vmnet_status_map_str(vmnet_return_t status);
+-- 
+2.28.0
+
+From bd2fc471e7a0b1e8e700b0be8e5ae08fe2fd5b9b Mon Sep 17 00:00:00 2001
+From: osy <osy@turing.llc>
+Date: Mon, 6 Mar 2023 15:23:31 -0800
+Subject: [PATCH] Merge branch 'with_tcti_vectors' into utm-edition
+
+---
+ block/file-posix.c                    |   21 +
+ include/qemu/osdep.h                  |    2 +-
+ meson.build                           |   87 ++-
+ tcg/aarch64-tcti/tcg-target-con-set.h |   23 +-
+ tcg/aarch64-tcti/tcg-target-con-str.h |   11 +-
+ tcg/aarch64-tcti/tcg-target.c.inc     | 1274 ++++++++++++++++++++++++++-------
+ tcg/aarch64-tcti/tcg-target.h         |  163 +++--
+ tcg/aarch64-tcti/tcg-target.opc.h     |   14 +
+ tcg/aarch64-tcti/tcti-gadget-gen.py   |  613 +++++++++++++---
+ util/osdep.c                          |    6 +
+ 10 files changed, 1775 insertions(+), 439 deletions(-)
+
+diff --git a/block/file-posix.c b/block/file-posix.c
+index 9f6e6279d9..766bbb6cb5 100644
+--- a/block/file-posix.c
++++ b/block/file-posix.c
+@@ -280,6 +280,13 @@ static int raw_normalize_devicepath(const char **filename, Error **errp)
+ }
+ #endif
+ 
++#if defined(CONFIG_IOS)
++static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
++{
++    return -ENOTSUP; /* not supported on iOS */
++}
++#else /* CONFIG_IOS */
++
+ /*
+  * Get logical block size via ioctl. On success store it in @sector_size_p.
+  */
+@@ -313,6 +320,8 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
+     return success ? 0 : -errno;
+ }
+ 
++#endif
++
+ /**
+  * Get physical block size of @fd.
+  * On success, store it in @blk_size and return 0.
+@@ -1449,12 +1458,24 @@ static bool preadv_present = true;
+ static ssize_t
+ qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+ {
++#ifdef CONFIG_DARWIN /* preadv introduced in macOS 11 */
++    if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
++        preadv_present = false;
++        return -ENOSYS;
++    } else
++#endif
+     return preadv(fd, iov, nr_iov, offset);
+ }
+ 
+ static ssize_t
+ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+ {
++#ifdef CONFIG_DARWIN /* pwritev introduced in macOS 11 */
++    if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
++        preadv_present = false;
++        return -ENOSYS;
++    } else
++#endif
+     return pwritev(fd, iov, nr_iov, offset);
+ }
+ 
+diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
+index 8dbf741ee4..97bc3ceac3 100644
+--- a/include/qemu/osdep.h
++++ b/include/qemu/osdep.h
+@@ -676,7 +676,7 @@ size_t qemu_get_host_physmem(void);
+ /**
+  * Platforms which do not support system() return ENOSYS
+  */
+-#ifndef HAVE_SYSTEM_FUNCTION
++#if !defined(HAVE_SYSTEM_FUNCTION) || defined(CONFIG_IOS)
+ #define system platform_does_not_support_system
+ static inline int platform_does_not_support_system(const char *command)
+ {
+diff --git a/meson.build b/meson.build
+index ab6a60d1a8..5fd46123de 100644
+--- a/meson.build
++++ b/meson.build
+@@ -294,6 +294,7 @@ add_project_arguments('-iquote', '.',
+ 
+ if host_machine.system() == 'darwin'
+   add_languages('objc', required: false, native: false)
++  add_project_link_arguments(['-fvisibility-inlines-hidden', '-Xlinker', '-no_deduplicate'], native: false, language: ['c', 'cpp', 'objc'])
+ endif
+ 
+ sparse = find_program('cgcc', required: get_option('sparse'))
+@@ -455,6 +456,8 @@ if targetos == 'netbsd'
+   endif
+ endif
+ 
++tcti_gadgets = files()
++
+ tcg_arch = host_arch
+ if get_option('tcg').allowed()
+   if host_arch == 'unknown'
+@@ -483,14 +486,77 @@ if get_option('tcg').allowed()
+ 
+     # Tell our compiler how to generate our TCTI gadgets.
+     gadget_generator = 'tcg/@0@/tcti-gadget-gen.py'.format(tcg_arch)
+-    tcti_gadgets = custom_target('tcti-gadgets.c.inc',
+-                                output: 'tcti-gadgets.c.inc',
+-                                input: gadget_generator,
+-                                command: [find_program(gadget_generator), '@OUTPUT@'],
+-                                build_by_default: true,
+-                                build_always_stale: false)
+-
+-    genh += tcti_gadgets
++    tcti_sources = [
++        'tcti_gadgets.h',
++        'tcti_misc_gadgets.c',
++        'tcti_misc_gadgets.h',
++        'tcti_setcond_gadgets.c',
++        'tcti_setcond_gadgets.h',
++        'tcti_brcond_gadgets.c',
++        'tcti_brcond_gadgets.h',
++        'tcti_mov_gadgets.c',
++        'tcti_mov_gadgets.h',
++        'tcti_load_signed_gadgets.c',
++        'tcti_load_signed_gadgets.h',
++        'tcti_load_unsigned_gadgets.c',
++        'tcti_load_unsigned_gadgets.h',
++        'tcti_store_gadgets.c',
++        'tcti_store_gadgets.h',
++        'tcti_arithmetic_gadgets.c',
++        'tcti_arithmetic_gadgets.h',
++        'tcti_logical_gadgets.c',
++        'tcti_logical_gadgets.h',
++        'tcti_extension_gadgets.c',
++        'tcti_extension_gadgets.h',
++        'tcti_bitwise_gadgets.c',
++        'tcti_bitwise_gadgets.h',
++        'tcti_byteswap_gadgets.c',
++        'tcti_byteswap_gadgets.h',
++        'tcti_qemu_ld_aligned_signed_le_gadgets.c',
++        'tcti_qemu_ld_aligned_signed_le_gadgets.h',
++        'tcti_qemu_ld_unaligned_signed_le_gadgets.c',
++        'tcti_qemu_ld_unaligned_signed_le_gadgets.h',
++        'tcti_qemu_ld_slowpath_signed_le_gadgets.c',
++        'tcti_qemu_ld_slowpath_signed_le_gadgets.h',
++        'tcti_qemu_ld_aligned_unsigned_le_gadgets.c',
++        'tcti_qemu_ld_aligned_unsigned_le_gadgets.h',
++        'tcti_qemu_ld_unaligned_unsigned_le_gadgets.c',
++        'tcti_qemu_ld_unaligned_unsigned_le_gadgets.h',
++        'tcti_qemu_ld_slowpath_unsigned_le_gadgets.c',
++        'tcti_qemu_ld_slowpath_unsigned_le_gadgets.h',
++        'tcti_qemu_ld_aligned_be_gadgets.c',
++        'tcti_qemu_ld_aligned_be_gadgets.h',
++        'tcti_qemu_ld_unaligned_be_gadgets.c',
++        'tcti_qemu_ld_unaligned_be_gadgets.h',
++        'tcti_qemu_ld_slowpath_be_gadgets.c',
++        'tcti_qemu_ld_slowpath_be_gadgets.h',
++        'tcti_qemu_st_aligned_le_gadgets.c',
++        'tcti_qemu_st_aligned_le_gadgets.h',
++        'tcti_qemu_st_unaligned_le_gadgets.c',
++        'tcti_qemu_st_unaligned_le_gadgets.h',
++        'tcti_qemu_st_slowpath_le_gadgets.c',
++        'tcti_qemu_st_slowpath_le_gadgets.h',
++        'tcti_qemu_st_aligned_be_gadgets.c',
++        'tcti_qemu_st_aligned_be_gadgets.h',
++        'tcti_qemu_st_unaligned_be_gadgets.c',
++        'tcti_qemu_st_unaligned_be_gadgets.h',
++        'tcti_qemu_st_slowpath_be_gadgets.c',
++        'tcti_qemu_st_slowpath_be_gadgets.h',
++        'tcti_simd_base_gadgets.c',
++        'tcti_simd_base_gadgets.h',
++        'tcti_simd_arithmetic_gadgets.c',
++        'tcti_simd_arithmetic_gadgets.h',
++        'tcti_simd_logical_gadgets.c',
++        'tcti_simd_logical_gadgets.h',
++        'tcti_simd_immediate_gadgets.c',
++        'tcti_simd_immediate_gadgets.h',
++    ]
++    tcti_gadgets = custom_target('tcti-gadgets.h',
++                              output: tcti_sources,
++                              input: gadget_generator,
++                              command: [find_program(gadget_generator)],
++                              build_by_default: true,
++                              build_always_stale: false)
+   elif host_arch == 'x86_64'
+     tcg_arch = 'i386'
+   elif host_arch == 'ppc64'
+@@ -3157,6 +3223,11 @@ if get_option('b_lto')
+ endif
+ common_ss.add(pagevary)
+ specific_ss.add(files('page-vary.c'))
++specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tcg/tci.c'))
++
++# FIXME: This is being used for now for development quickness, but these realy should be
++# added to a gadget-specific shared library (tcti_ss).
++specific_ss.add(when: 'CONFIG_TCG_THREADED_INTERPRETER', if_true: tcti_gadgets)
+ 
+ subdir('backends')
+ subdir('disas')
+diff --git a/tcg/aarch64-tcti/tcg-target-con-set.h b/tcg/aarch64-tcti/tcg-target-con-set.h
+index f51b7bcb13..a0b91bb320 100644
+--- a/tcg/aarch64-tcti/tcg-target-con-set.h
++++ b/tcg/aarch64-tcti/tcg-target-con-set.h
+@@ -9,13 +9,24 @@
+  * Each operand should be a sequence of constraint letters as defined by
+  * tcg-target-con-str.h; the constraint combination is inclusive or.
+  */
++
++// Simple register functions.
++C_O0_I1(r)
+ C_O0_I2(r, r)
+ C_O0_I3(r, r, r)
+-C_O0_I4(r, r, r, r)
++//C_O0_I4(r, r, r, r)
+ C_O1_I1(r, r)
+-C_O1_I2(r, 0, r)
+ C_O1_I2(r, r, r)
+-C_O1_I4(r, r, r, r, r)
+-C_O2_I1(r, r, r)
+-C_O2_I2(r, r, r, r)
+-C_O2_I4(r, r, r, r, r, r)
++//C_O1_I4(r, r, r, r, r)
++//C_O2_I1(r, r, r)
++//C_O2_I2(r, r, r, r)
++//C_O2_I4(r, r, r, r, r, r)
++
++// Vector functions.
++C_O1_I1(w, w)
++C_O1_I1(w, r)
++C_O0_I2(w, r)
++C_O1_I1(w, wr)
++C_O1_I2(w, w, w)
++C_O1_I3(w, w, w, w)
++C_O1_I2(w, 0, w)
+\ No newline at end of file
+diff --git a/tcg/aarch64-tcti/tcg-target-con-str.h b/tcg/aarch64-tcti/tcg-target-con-str.h
+index 87c0f19e9c..94d06d3e74 100644
+--- a/tcg/aarch64-tcti/tcg-target-con-str.h
++++ b/tcg/aarch64-tcti/tcg-target-con-str.h
+@@ -8,4 +8,13 @@
+  * Define constraint letters for register sets:
+  * REGS(letter, register_mask)
+  */
+-REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS))
++REGS('r', TCG_MASK_GP_REGISTERS)
++REGS('w', TCG_MASK_VECTOR_REGISTERS)
++
++/*
++ * Define constraint letters for constants:
++ * CONST(letter, TCG_CT_CONST_* bit set)
++ */
++
++// Simple 64-bit immediates.
++CONST('I', 0xFFFFFFFFFFFFFFFF)
+diff --git a/tcg/aarch64-tcti/tcg-target.c.inc b/tcg/aarch64-tcti/tcg-target.c.inc
+index af4cc8d664..10d6c4ec1b 100644
+--- a/tcg/aarch64-tcti/tcg-target.c.inc
++++ b/tcg/aarch64-tcti/tcg-target.c.inc
+@@ -22,13 +22,16 @@
+  * THE SOFTWARE.
+  */
+ 
++
++// Rich disassembly is nice in theory, but it's -slow-.
++//#define TCTI_GADGET_RICH_DISASSEMBLY
++
+ #define TCTI_GADGET_IMMEDIATE_ARRAY_LEN 64
+ 
+ #include "tcg/tcg-ldst.h"
+ 
+-// Grab our gadget definitions.
+-// FIXME: use the system path instead of hardcoding this?
+-#include "tcti-gadgets.c.inc"
++// Grab our gadget headers.
++#include "tcti_gadgets.h"
+ 
+ /* Marker for missing code. */
+ #define TODO() \
+@@ -47,64 +50,15 @@
+ # define tcti_assert(cond) ((void)0)
+ #endif
+ 
+-/* Bitfield n...m (in 32 bit value). */
+-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
+-
+-/**
+- * Macro that defines a look-up tree for named QEMU_LD gadgets.
+- */ 
+-#define LD_MEMOP_LOOKUP(variable, arg, suffix) \
+-    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
+-        case MO_UB:   variable = gadget_qemu_ld_ub_   ## suffix; break; \
+-        case MO_SB:   variable = gadget_qemu_ld_sb_   ## suffix; break; \
+-        case MO_LEUW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
+-        case MO_LESW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
+-        case MO_LEUL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
+-        case MO_LESL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
+-        case MO_LEUQ: variable = gadget_qemu_ld_leq_  ## suffix; break; \
+-        case MO_BEUW: variable = gadget_qemu_ld_beuw_ ## suffix; break; \
+-        case MO_BESW: variable = gadget_qemu_ld_besw_ ## suffix; break; \
+-        case MO_BEUL: variable = gadget_qemu_ld_beul_ ## suffix; break; \
+-        case MO_BESL: variable = gadget_qemu_ld_besl_ ## suffix; break; \
+-        case MO_BEUQ: variable = gadget_qemu_ld_beq_  ## suffix; break; \
+-        default: \
+-            g_assert_not_reached(); \
+-    }
+-#define LD_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
+-        if (a_bits >= s_bits) { \
+-            LD_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
+-        } else { \
+-            LD_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
+-        }
+-
+-
+-
+-/**
+- * Macro that defines a look-up tree for named QEMU_ST gadgets.
+- */ 
+-#define ST_MEMOP_LOOKUP(variable, arg, suffix) \
+-    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
+-        case MO_UB:   variable = gadget_qemu_st_ub_   ## suffix; break; \
+-        case MO_LEUW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
+-        case MO_LEUL: variable = gadget_qemu_st_leul_ ## suffix; break; \
+-        case MO_LEUQ: variable = gadget_qemu_st_leq_  ## suffix; break; \
+-        case MO_BEUW: variable = gadget_qemu_st_beuw_ ## suffix; break; \
+-        case MO_BEUL: variable = gadget_qemu_st_beul_ ## suffix; break; \
+-        case MO_BEUQ: variable = gadget_qemu_st_beq_  ## suffix; break; \
+-        default: \
+-            g_assert_not_reached(); \
+-    }
+-#define ST_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
+-        if (a_bits >= s_bits) { \
+-            ST_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
+-        } else { \
+-            ST_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
+-        }
+ 
++/********************************
++ *  TCG Constraints Definitions *
++ ********************************/
+ 
+ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+ {
+     switch (op) {
++
+     case INDEX_op_ld8u_i32:
+     case INDEX_op_ld8s_i32:
+     case INDEX_op_ld16u_i32:
+@@ -138,6 +92,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+     case INDEX_op_bswap32_i32:
+     case INDEX_op_bswap32_i64:
+     case INDEX_op_bswap64_i64:
++    case INDEX_op_extrl_i64_i32:
++    case INDEX_op_extrh_i64_i32:
+         return C_O1_I1(r, r);
+ 
+     case INDEX_op_st8_i32:
+@@ -191,6 +147,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+     case INDEX_op_rotr_i64:
+     case INDEX_op_setcond_i32:
+     case INDEX_op_setcond_i64:
++    case INDEX_op_clz_i32:
++    case INDEX_op_clz_i64:
++    case INDEX_op_ctz_i32:
++    case INDEX_op_ctz_i64:
+         return C_O1_I2(r, r, r);
+ 
+     case INDEX_op_brcond_i32:
+@@ -204,12 +164,65 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+     case INDEX_op_qemu_st_i64:
+         return C_O0_I3(r, r, r);
+ 
++    //
++    // Vector ops.
++    //
++    case INDEX_op_add_vec:
++    case INDEX_op_sub_vec:
++    case INDEX_op_mul_vec:
++    case INDEX_op_xor_vec:
++    case INDEX_op_ssadd_vec:
++    case INDEX_op_sssub_vec:
++    case INDEX_op_usadd_vec:
++    case INDEX_op_ussub_vec:
++    case INDEX_op_smax_vec:
++    case INDEX_op_smin_vec:
++    case INDEX_op_umax_vec:
++    case INDEX_op_umin_vec:
++    case INDEX_op_shlv_vec:
++    case INDEX_op_shrv_vec:
++    case INDEX_op_sarv_vec:
++    case INDEX_op_aa64_sshl_vec:
++        return C_O1_I2(w, w, w);
++    case INDEX_op_not_vec:
++    case INDEX_op_neg_vec:
++    case INDEX_op_abs_vec:
++    case INDEX_op_shli_vec:
++    case INDEX_op_shri_vec:
++    case INDEX_op_sari_vec:
++        return C_O1_I1(w, w);
++    case INDEX_op_ld_vec:
++    case INDEX_op_dupm_vec:
++        return C_O1_I1(w, r);
++    case INDEX_op_st_vec:
++        return C_O0_I2(w, r);
++    case INDEX_op_dup_vec:
++        return C_O1_I1(w, wr);
++    case INDEX_op_or_vec:
++    case INDEX_op_andc_vec:
++        return C_O1_I2(w, w, w);
++    case INDEX_op_and_vec:
++    case INDEX_op_orc_vec:
++        return C_O1_I2(w, w, w);
++    case INDEX_op_cmp_vec:
++        return C_O1_I2(w, w, w);
++    case INDEX_op_bitsel_vec:
++        return C_O1_I3(w, w, w, w);
++
+     default:
+         g_assert_not_reached();
+     }
+ }
+ 
+ static const int tcg_target_reg_alloc_order[] = {
++
++    // General purpose registers, in preference-of-allocation order.
++    TCG_REG_R8,
++    TCG_REG_R9,
++    TCG_REG_R10,
++    TCG_REG_R11,
++    TCG_REG_R12,
++    TCG_REG_R13,
+     TCG_REG_R0,
+     TCG_REG_R1,
+     TCG_REG_R2,
+@@ -218,16 +231,15 @@ static const int tcg_target_reg_alloc_order[] = {
+     TCG_REG_R5,
+     TCG_REG_R6,
+     TCG_REG_R7,
+-    TCG_REG_R8,
+-    TCG_REG_R9,
+-    TCG_REG_R10,
+-    TCG_REG_R11,
+-    TCG_REG_R12,
+-    TCG_REG_R13,
+-    /*
+-    TCG_REG_R14,  // AREG0
+-    TCG_REG_R15,  // SP
+-    */
++
++    // Note: we do not allocate R14 or R15, as they're used for our
++    // special-purpose values.
++
++    // We'll use the high 16 vector register; avoiding the call-saved lower ones.
++    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
++    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
++    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
++    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+ };
+ 
+ #if MAX_OPC_PARAM_IARGS != 7
+@@ -248,7 +260,7 @@ static const int tcg_target_call_oarg_regs[] = {
+ };
+ 
+ #ifdef CONFIG_DEBUG_TCG
+-static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
++static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
+     "r00",
+     "r01",
+     "r02",
+@@ -268,6 +280,98 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ };
+ #endif
+ 
++/*************************
++ *  TCG Emitter Helpers  *
++ *************************/
++
++/* Bitfield n...m (in 32 bit value). */
++#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
++
++/**
++ * Macro that defines a look-up tree for named QEMU_LD gadgets.
++ */
++#define LD_MEMOP_LOOKUP(variable, arg, suffix) \
++    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
++        case MO_UB:   variable = gadget_qemu_ld_ub_   ## suffix; break; \
++        case MO_SB:   variable = gadget_qemu_ld_sb_   ## suffix; break; \
++        case MO_LEUW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
++        case MO_LESW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
++        case MO_LEUL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
++        case MO_LESL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
++        case MO_LEUQ:  variable = gadget_qemu_ld_leq_  ## suffix; break; \
++        case MO_BEUW: variable = gadget_qemu_ld_beuw_ ## suffix; break; \
++        case MO_BESW: variable = gadget_qemu_ld_besw_ ## suffix; break; \
++        case MO_BEUL: variable = gadget_qemu_ld_beul_ ## suffix; break; \
++        case MO_BESL: variable = gadget_qemu_ld_besl_ ## suffix; break; \
++        case MO_BEUQ:  variable = gadget_qemu_ld_beq_  ## suffix; break; \
++        default: \
++            g_assert_not_reached(); \
++    }
++#define LD_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
++        if (a_bits >= s_bits) { \
++            LD_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
++        } else { \
++            LD_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
++        }
++
++
++
++/**
++ * Macro that defines a look-up tree for named QEMU_ST gadgets.
++ */
++#define ST_MEMOP_LOOKUP(variable, arg, suffix) \
++    switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
++        case MO_UB:   variable = gadget_qemu_st_ub_   ## suffix; break; \
++        case MO_LEUW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
++        case MO_LEUL: variable = gadget_qemu_st_leul_ ## suffix; break; \
++        case MO_LEUQ:  variable = gadget_qemu_st_leq_  ## suffix; break; \
++        case MO_BEUW: variable = gadget_qemu_st_beuw_ ## suffix; break; \
++        case MO_BEUL: variable = gadget_qemu_st_beul_ ## suffix; break; \
++        case MO_BEUQ:  variable = gadget_qemu_st_beq_  ## suffix; break; \
++        default: \
++            g_assert_not_reached(); \
++    }
++#define ST_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
++        if (a_bits >= s_bits) { \
++            ST_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
++        } else { \
++            ST_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
++        }
++
++
++#define LOOKUP_SPECIAL_CASE_LDST_GADGET(arg, name, mode) \
++    switch(TLB_MASK_TABLE_OFS(get_mmuidx(arg))) { \
++        case -32:  \
++            gadget = (a_bits >= s_bits) ?  \
++                gadget_qemu_ ## name ## _aligned_ ## mode ## _off32_i64 : \
++                gadget_qemu_ ## name ## _unaligned_ ## mode ## _off32_i64; \
++            break; \
++        case -48:  \
++            gadget = (a_bits >= s_bits) ?  \
++                gadget_qemu_ ## name ## _aligned_ ## mode ## _off48_i64 : \
++                gadget_qemu_ ## name ## _unaligned_ ## mode ## _off48_i64; \
++            break; \
++        case -64: \
++            gadget = (a_bits >= s_bits) ? \
++                gadget_qemu_ ## name ## _aligned_ ## mode ## _off64_i64 : \
++                gadget_qemu_ ## name ## _unaligned_ ## mode ## _off64_i64; \
++            break; \
++        case -96: \
++            gadget = (a_bits >= s_bits) ? \
++                gadget_qemu_ ## name ## _aligned_ ## mode ## _off96_i64 : \
++                gadget_qemu_ ## name ## _unaligned_ ## mode ## _off96_i64; \
++            break; \
++        case -128: \
++            gadget = (a_bits >= s_bits) ? \
++                gadget_qemu_ ## name ## _aligned_ ## mode ## _off128_i64 : \
++                gadget_qemu_ ## name ## _unaligned_ ## mode ## _off128_i64; \
++            break;\
++        default: \
++            gadget = gadget_qemu_ ## name ## _slowpath_ ## mode ## _off0_i64; \
++            break; \
++        }
++
++
+ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+                         intptr_t value, intptr_t addend)
+ {
+@@ -363,48 +467,51 @@ tcg_target_ulong helper_be_ldul_mmu_signed(CPUArchState *env, target_ulong addr,
+ 
+ 
+ /* Write gadget pointer. */
+-static void tcg_out_nullary_gadget(TCGContext *s, void *gadget)
++static void tcg_out_gadget(TCGContext *s, const void *gadget)
+ {
+     tcg_out_immediate(s, (tcg_target_ulong)gadget);
+ }
+ 
+ /* Write gadget pointer, plus 64b immediate. */
+-static void tcg_out_imm64_gadget(TCGContext *s, void *gadget, tcg_target_ulong immediate)
++static void tcg_out_imm64_gadget(TCGContext *s, const void *gadget, tcg_target_ulong immediate)
+ {
+-    tcg_out_nullary_gadget(s, gadget);
++    tcg_out_gadget(s, gadget);
+     tcg_out64(s, immediate);
+ }
+ 
+ 
+ /* Write gadget pointer (one register). */
+-static void tcg_out_unary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS], unsigned reg0)
++static void tcg_out_unary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS], unsigned reg0)
+ {
+-    tcg_out_nullary_gadget(s, gadget_base[reg0]);
++    tcg_out_gadget(s, gadget_base[reg0]);
+ }
+ 
+ 
+ /* Write gadget pointer (two registers). */
+-static void tcg_out_binary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS], unsigned reg0, unsigned reg1)
++static void tcg_out_binary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1)
+ {
+-    tcg_out_nullary_gadget(s, gadget_base[reg0][reg1]);
++    tcg_out_gadget(s, gadget_base[reg0][reg1]);
+ }
+ 
+ 
+ /* Write gadget pointer (three registers). */
+-static void tcg_out_ternary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS], unsigned reg0, unsigned reg1, unsigned reg2)
++static void tcg_out_ternary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1, unsigned reg2)
+ {
+-    tcg_out_nullary_gadget(s, gadget_base[reg0][reg1][reg2]);
++    tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
+ }
+ 
++/***************************
++ *  TCG Scalar Operations  *
++ ***************************/
+ 
+ /**
+  * Version of our LDST generator that defers to more optimized gadgets selectively.
+  */
+-static void tcg_out_ldst_gadget_inner(TCGContext *s, 
+-    void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS], 
+-    void *gadget_pos_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN], 
+-    void *gadget_shifted_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN], 
+-    void *gadget_neg_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN], 
++static void tcg_out_ldst_gadget_inner(TCGContext *s,
++    const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS],
++    const void *gadget_pos_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
++    const void *gadget_shifted_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
++    const void *gadget_neg_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
+     unsigned reg0, unsigned reg1, uint32_t offset)
+ {
+     int64_t extended_offset = (int32_t)offset;
+@@ -415,7 +522,7 @@ static void tcg_out_ldst_gadget_inner(TCGContext *s,
+ 
+     // We handle positive and negative gadgets separately, in order to allow for asymmetrical
+     // collections of pre-made gadgets.
+-    if (!is_negative) 
++    if (!is_negative)
+     {
+         uint64_t shifted_offset = (extended_offset >> 3);
+         bool aligned_to_8B = ((extended_offset & 0b111) == 0);
+@@ -425,23 +532,23 @@ static void tcg_out_ldst_gadget_inner(TCGContext *s,
+ 
+         // More optimal case: we have a gadget that directly encodes the argument.
+         if (have_optimized_gadget) {
+-            tcg_out_nullary_gadget(s, gadget_pos_imm[reg0][reg1][extended_offset]);
++            tcg_out_gadget(s, gadget_pos_imm[reg0][reg1][extended_offset]);
+             return;
+-        } 
++        }
+ 
+         // Special case: it's frequent to have low-numbered positive offsets that are aligned
+         // to 16B boundaries
+         else if(aligned_to_8B && have_shifted_gadget) {
+-            tcg_out_nullary_gadget(s, gadget_shifted_imm[reg0][reg1][shifted_offset]);
++            tcg_out_gadget(s, gadget_shifted_imm[reg0][reg1][shifted_offset]);
+             return;
+         }
+-    } 
++    }
+     else {
+         uint64_t negated_offset = -(extended_offset);
+ 
+         // More optimal case: we have a gadget that directly encodes the argument.
+         if (negated_offset < TCTI_GADGET_IMMEDIATE_ARRAY_LEN) {
+-            tcg_out_nullary_gadget(s, gadget_neg_imm[reg0][reg1][negated_offset]);
++            tcg_out_gadget(s, gadget_neg_imm[reg0][reg1][negated_offset]);
+             return;
+         }
+     }
+@@ -473,40 +580,90 @@ static void tcti_out_label(TCGContext *s, TCGLabel *label)
+     }
+ }
+ 
+-/**
+- * Generate a register-to-register MOV.
+- */
++
++/* Register to register move using ORR (shifted register with no shift). */
++static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
++{
++    switch(ext) {
++        case TCG_TYPE_I32:
++            tcg_out_binary_gadget(s, gadget_mov_i32, rd, rm);
++            break;
++
++        case TCG_TYPE_I64:
++            tcg_out_binary_gadget(s, gadget_mov_i64, rd, rm);
++            break;
++
++        default:
++            g_assert_not_reached();
++
++    }
++}
++
++
+ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+ {
+-    tcg_debug_assert(ret != arg);
++    TCGReg w_ret = (ret - TCG_REG_V16);
++    TCGReg w_arg = (arg - TCG_REG_V16);
+ 
+-    if (type == TCG_TYPE_I32) {
+-        tcg_out_binary_gadget(s, gadget_mov_i32, ret, arg);
+-    } else {
+-        tcg_out_binary_gadget(s, gadget_mov_i64, ret, arg);
++    if (ret == arg) {
++        return true;
+     }
+ 
++    switch (type) {
++    case TCG_TYPE_I32:
++    case TCG_TYPE_I64:
++
++        // If this is a GP to GP register mov, issue our standard MOV.
++        if (ret < 32 && arg < 32) {
++            tcg_out_movr(s, type, ret, arg);
++            break;
++        } 
++        // If this is a vector register to GP, issue a UMOV.
++        else if (ret < 32) {
++            void *gadget = (type == TCG_TYPE_I32) ? gadget_umov_s0 : gadget_umov_d0;
++            tcg_out_binary_gadget(s, gadget, ret, w_arg);
++            break;
++        } 
++        
++        // If this is a GP to vector move, insert the vealue using INS.
++        else if (arg < 32) {
++            void *gadget = (type == TCG_TYPE_I32) ? gadget_ins_s0 : gadget_ins_d0;
++            tcg_out_binary_gadget(s, gadget, w_ret, arg);
++            break;
++        }
++        /* FALLTHRU */
++
++    case TCG_TYPE_V64:
++        tcg_debug_assert(ret >= 32 && arg >= 32);
++        tcg_out_ternary_gadget(s, gadget_or_d, w_ret, w_arg, w_arg);
++        break;
++
++    case TCG_TYPE_V128:
++        tcg_debug_assert(ret >= 32 && arg >= 32);
++        tcg_out_ternary_gadget(s, gadget_or_q, w_ret, w_arg, w_arg);
++        break;
+ 
++    default:
++        g_assert_not_reached();
++    }
+     return true;
+ }
+ 
+ 
++
+ static void tcg_out_movi_i32(TCGContext *s, TCGReg t0, tcg_target_long arg)
+ {
+     bool is_negative = (arg < 0);
+ 
+     // We handle positive and negative gadgets separately, in order to allow for asymmetrical
+     // collections of pre-made gadgets.
+-    if (!is_negative) 
++    if (!is_negative)
+     {
+         // More optimal case: we have a gadget that directly encodes the argument.
+         if (arg < ARRAY_SIZE(gadget_movi_imm_i32[t0])) {
+-            tcg_out_nullary_gadget(s, gadget_movi_imm_i32[t0][arg]);
++            tcg_out_gadget(s, gadget_movi_imm_i32[t0][arg]);
+             return;
+         }
+-    } 
+-    else {
+-
+     }
+ 
+     // Emit the mov and its immediate.
+@@ -521,16 +678,13 @@ static void tcg_out_movi_i64(TCGContext *s, TCGReg t0, tcg_target_long arg)
+ 
+     // We handle positive and negative gadgets separately, in order to allow for asymmetrical
+     // collections of pre-made gadgets.
+-    if (!is_negative) 
++    if (!is_negative)
+     {
+         // More optimal case: we have a gadget that directly encodes the argument.
+         if (arg < ARRAY_SIZE(gadget_movi_imm_i64[t0])) {
+-            tcg_out_nullary_gadget(s, gadget_movi_imm_i64[t0][arg]);
++            tcg_out_gadget(s, gadget_movi_imm_i64[t0][arg]);
+             return;
+         }
+-    } 
+-    else {
+-
+     }
+ 
+     // TODO: optimize the negative case, too?
+@@ -558,7 +712,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg t0, tcg_target_long
+  */
+ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
+ {
+-    tcg_out_nullary_gadget(s, gadget_call);
++    tcg_out_gadget(s, gadget_call);
+     tcg_out64(s, (uintptr_t)arg);
+ }
+ 
+@@ -570,9 +724,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+ {
+ 
+     if (type == TCG_TYPE_I32) {
+-        tcg_out_ldst_gadget(s, gadget_ld32u, ret, arg1, arg2); 
++        tcg_out_ldst_gadget(s, gadget_ld32u, ret, arg1, arg2);
+     } else {
+-        tcg_out_ldst_gadget(s, gadget_ld_i64, ret, arg1, arg2); 
++        tcg_out_ldst_gadget(s, gadget_ld_i64, ret, arg1, arg2);
+     }
+ }
+ 
+@@ -598,7 +752,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         // to patch our gadget stream with the target address, later.
+         if (s->tb_jmp_insn_offset) {
+             // Emit our gadget.
+-            tcg_out_nullary_gadget(s, gadget_br);
++            tcg_out_gadget(s, gadget_br);
+ 
+             // Place our current instruction into our "relocation table", so it can
+             // be patched once we know where the branch will target...
+@@ -617,7 +771,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+     // Simple branch.
+     case INDEX_op_br:
+-        tcg_out_nullary_gadget(s, gadget_br);
++        tcg_out_gadget(s, gadget_br);
+         tcti_out_label(s, arg_label(args[0]));
+         break;
+ 
+@@ -678,41 +832,41 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+     case INDEX_op_ld8u_i32:
+     case INDEX_op_ld8u_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld8u, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld8u, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld8s_i32:
+-        tcg_out_ldst_gadget(s, gadget_ld8s_i32, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld8s_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld8s_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld8s_i64, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld8s_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld16u_i32:
+     case INDEX_op_ld16u_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld16u, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld16u, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld16s_i32:
+-        tcg_out_ldst_gadget(s, gadget_ld16s_i32, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld16s_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld16s_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld16s_i64, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld16s_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld_i32:
+     case INDEX_op_ld32u_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld32u, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld32u, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_ld_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld_i64, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld_i64, args[0], args[1], args[2]);
+         break;
+-    
++   
+     case INDEX_op_ld32s_i64:
+-        tcg_out_ldst_gadget(s, gadget_ld32s_i64, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_ld32s_i64, args[0], args[1], args[2]);
+         break;
+ 
+ 
+@@ -721,155 +875,169 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+      */
+     case INDEX_op_st8_i32:
+     case INDEX_op_st8_i64:
+-        tcg_out_ldst_gadget(s, gadget_st8, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_st8, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_st16_i32:
+     case INDEX_op_st16_i64:
+-        tcg_out_ldst_gadget(s, gadget_st16, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_st16, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_st_i32:
+     case INDEX_op_st32_i64:
+-        tcg_out_ldst_gadget(s, gadget_st_i32, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_st_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_st_i64:
+-        tcg_out_ldst_gadget(s, gadget_st_i64, args[0], args[1], args[2]); 
++        tcg_out_ldst_gadget(s, gadget_st_i64, args[0], args[1], args[2]);
+         break;
+ 
+     /**
+      * Arithmetic instructions.
+      */
+ 
+-    case INDEX_op_add_i32: 
+-        tcg_out_ternary_gadget(s, gadget_add_i32, args[0], args[1], args[2]); 
++    case INDEX_op_add_i32:
++        tcg_out_ternary_gadget(s, gadget_add_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_sub_i32:
+-        tcg_out_ternary_gadget(s, gadget_sub_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_sub_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_mul_i32:
+-        tcg_out_ternary_gadget(s, gadget_mul_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_mul_i32, args[0], args[1], args[2]);
++        break;
++
++    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */
++        tcg_out_ternary_gadget(s, gadget_nand_i32, args[0], args[1], args[2]);
++        break;
++
++    case INDEX_op_nor_i32:     /* Optional (TCG_TARGET_HAS_nor_i32). */
++        tcg_out_ternary_gadget(s, gadget_nor_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_and_i32:
+-        tcg_out_ternary_gadget(s, gadget_and_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_and_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */
+-        tcg_out_ternary_gadget(s, gadget_andc_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_andc_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i64). */
+-        tcg_out_ternary_gadget(s, gadget_orc_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_orc_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_orc_i64). */
+-        tcg_out_ternary_gadget(s, gadget_eqv_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_eqv_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_or_i32:
+-        tcg_out_ternary_gadget(s, gadget_or_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_or_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_xor_i32:
+-        tcg_out_ternary_gadget(s, gadget_xor_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_xor_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_shl_i32:
+-        tcg_out_ternary_gadget(s, gadget_shl_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_shl_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_shr_i32:
+-        tcg_out_ternary_gadget(s, gadget_shr_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_shr_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_sar_i32:
+-        tcg_out_ternary_gadget(s, gadget_sar_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_sar_i32, args[0], args[1], args[2]);
+         break;
+ 
+-    //case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
+-    //    tcg_out_ternary_gadget(s, gadget_rotr_i32, args[0], args[1], args[2]); 
+-    //    break;
++    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
++        tcg_out_ternary_gadget(s, gadget_rotr_i32, args[0], args[1], args[2]);
++        break;
+ 
+-    //case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
+-    //    tcg_out_ternary_gadget(s, gadget_rotl_i32, args[0], args[1], args[2]); 
++    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
++        tcg_out_ternary_gadget(s, gadget_rotl_i32, args[0], args[1], args[2]);
++        break;
+ 
+     case INDEX_op_add_i64:
+-        tcg_out_ternary_gadget(s, gadget_add_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_add_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_sub_i64:
+-        tcg_out_ternary_gadget(s, gadget_sub_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_sub_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_mul_i64:
+-        tcg_out_ternary_gadget(s, gadget_mul_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_mul_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_and_i64:
+-        tcg_out_ternary_gadget(s, gadget_and_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_and_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */
+-        tcg_out_ternary_gadget(s, gadget_andc_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_andc_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */
+-        tcg_out_ternary_gadget(s, gadget_orc_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_orc_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */
+-        tcg_out_ternary_gadget(s, gadget_eqv_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_eqv_i64, args[0], args[1], args[2]);
++        break;
++
++    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
++        tcg_out_ternary_gadget(s, gadget_nand_i64, args[0], args[1], args[2]);
+         break;
+ 
+-    //case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
+-    //case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
++    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
++        tcg_out_ternary_gadget(s, gadget_nor_i64, args[0], args[1], args[2]);
++        break;
+ 
+     case INDEX_op_or_i64:
+-        tcg_out_ternary_gadget(s, gadget_or_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_or_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_xor_i64:
+-        tcg_out_ternary_gadget(s, gadget_xor_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_xor_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_shl_i64:
+-        tcg_out_ternary_gadget(s, gadget_shl_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_shl_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_shr_i64:
+-        tcg_out_ternary_gadget(s, gadget_shr_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_shr_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_sar_i64:
+-        tcg_out_ternary_gadget(s, gadget_sar_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_sar_i64, args[0], args[1], args[2]);
+         break;
+ 
+-    //case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
+-    //    tcg_out_ternary_gadget(s, gadget_rotl_i64, args[0], args[1], args[2]); 
+-    //    break;
++    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
++        tcg_out_ternary_gadget(s, gadget_rotl_i64, args[0], args[1], args[2]);
++        break;
+ 
+-    //case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
+-    //    tcg_out_ternary_gadget(s, gadget_rotr_i64, args[0], args[1], args[2]); 
+-    //    break;
++    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
++        tcg_out_ternary_gadget(s, gadget_rotr_i64, args[0], args[1], args[2]);
++        break;
+ 
+     case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
+-        tcg_out_ternary_gadget(s, gadget_div_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_div_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
+-        tcg_out_ternary_gadget(s, gadget_divu_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_divu_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
+-        tcg_out_ternary_gadget(s, gadget_rem_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_rem_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
+-        tcg_out_ternary_gadget(s, gadget_remu_i64, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_remu_i64, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_brcond_i64:
+@@ -898,7 +1066,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         // helps the processor's branch prediction be less "squished", as not every
+         // branch is going throuh the same instruction.
+         tcg_out_ternary_gadget(s, gadget, last_brcond_i64, args[0], args[1]);
+-        last_brcond_i64 = (last_brcond_i64 + 1) % TCG_TARGET_NB_REGS;
++        last_brcond_i64 = (last_brcond_i64 + 1) % TCG_TARGET_GP_REGS;
+ 
+         // Branch target immediate.
+         tcti_out_label(s, arg_label(args[3]));
+@@ -928,6 +1096,14 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         tcg_out_binary_gadget(s, gadget_neg_i64, args[0], args[1]);
+         break;
+ 
++    case INDEX_op_clz_i64:      /* Optional (TCG_TARGET_HAS_clz_i64). */
++        tcg_out_ternary_gadget(s, gadget_clz_i64, args[0], args[1], args[2]);
++        break;
++
++    case INDEX_op_ctz_i64:      /* Optional (TCG_TARGET_HAS_ctz_i64). */
++        tcg_out_ternary_gadget(s, gadget_ctz_i64, args[0], args[1], args[2]);
++        break;
++
+     case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
+         tcg_out_binary_gadget(s, gadget_ext8s_i64, args[0], args[1]);
+         break;
+@@ -956,10 +1132,26 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         tcg_out_binary_gadget(s, gadget_ext32u_i64, args[0], args[1]);
+         break;
+ 
++    case INDEX_op_extrl_i64_i32:
++        tcg_out_binary_gadget(s, gadget_extrl, args[0], args[1]);
++        break;
++
++    case INDEX_op_extrh_i64_i32:
++        tcg_out_binary_gadget(s, gadget_extrh, args[0], args[1]);
++        break;
++
+     case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
+         tcg_out_binary_gadget(s, gadget_neg_i32, args[0], args[1]);
+         break;
+ 
++    case INDEX_op_clz_i32:      /* Optional (TCG_TARGET_HAS_clz_i32). */
++        tcg_out_ternary_gadget(s, gadget_clz_i32, args[0], args[1], args[2]);
++        break;
++
++    case INDEX_op_ctz_i32:      /* Optional (TCG_TARGET_HAS_ctz_i32). */
++        tcg_out_ternary_gadget(s, gadget_ctz_i32, args[0], args[1], args[2]);
++        break;
++
+     case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
+         tcg_out_binary_gadget(s, gadget_not_i32, args[0], args[1]);
+         break;
+@@ -973,19 +1165,19 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         break;
+ 
+     case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
+-        tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
+-        tcg_out_ternary_gadget(s, gadget_divu_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_divu_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
+-        tcg_out_ternary_gadget(s, gadget_rem_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_rem_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
+-        tcg_out_ternary_gadget(s, gadget_remu_i32, args[0], args[1], args[2]); 
++        tcg_out_ternary_gadget(s, gadget_remu_i32, args[0], args[1], args[2]);
+         break;
+ 
+     case INDEX_op_brcond_i32:
+@@ -1014,7 +1206,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         // helps the processor's branch prediction be less "squished", as not every
+         // branch is going throuh the same instruction.
+         tcg_out_ternary_gadget(s, gadget, last_brcond_i32, args[0], args[1]);
+-        last_brcond_i32 = (last_brcond_i32 + 1) % TCG_TARGET_NB_REGS;
++        last_brcond_i32 = (last_brcond_i32 + 1) % TCG_TARGET_GP_REGS;
+ 
+         // Branch target immediate.
+         tcti_out_label(s, arg_label(args[3]));
+@@ -1031,6 +1223,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         void *gadget;
+ 
+         switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
++            case -32:  LD_MEMOP_HANDLER(gadget, args[2],  off32_i32, a_bits, s_bits); break;
++            case -48:  LD_MEMOP_HANDLER(gadget, args[2],  off48_i32, a_bits, s_bits); break;
+             case -64:  LD_MEMOP_HANDLER(gadget, args[2],  off64_i32, a_bits, s_bits); break;
+             case -96:  LD_MEMOP_HANDLER(gadget, args[2],  off96_i32, a_bits, s_bits); break;
+             case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
+@@ -1038,7 +1232,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         }
+ 
+         // Args:
+-        // - an immediate32 encodes our operation index 
++        // - an immediate32 encodes our operation index
+         tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+         tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
+         break;
+@@ -1052,43 +1246,31 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+         void *gadget;
+ 
+-        // Special optimization case: if we have an operation/target of 0x3A, 
+-        // this is a common case. Delegate to our special-case handler.
+-        if (args[2] == 0x3a) {
+-            switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+-
+-                case -64: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_ld_leq_aligned_mode3a_off64_i64 :
+-                        gadget_qemu_ld_leq_unaligned_mode3a_off64_i64;
+-                    break;
+-                case -96: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_ld_leq_aligned_mode3a_off96_i64 :
+-                        gadget_qemu_ld_leq_unaligned_mode3a_off96_i64;
+-                    break;
+-                case -128: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_ld_leq_aligned_mode3a_off128_i64 :
+-                        gadget_qemu_ld_leq_unaligned_mode3a_off128_i64;
+-                    break;
+-
+-                default: 
+-                    gadget = gadget_qemu_ld_leq_slowpath_mode3a_off0_i64;
+-                    break;
+-            }
++        // Special optimization case: if we have an common case.
++        // Delegate to our special-case handler.
++        if (args[2] == 0x02) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_ub, mode02)
+             tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+-        } 
++        } else if (args[2] == 0x32) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode32)
++            tcg_out_binary_gadget(s, gadget, args[0], args[1]);
++        } else if(args[2] == 0x3a) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode3a)
++            tcg_out_binary_gadget(s, gadget, args[0], args[1]);
++        }
+         // Otherwise, handle the generic case.
+         else {
+             switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
++                case -32:  LD_MEMOP_HANDLER(gadget, args[2],  off32_i64, a_bits, s_bits); break;
++                case -48:  LD_MEMOP_HANDLER(gadget, args[2],  off48_i64, a_bits, s_bits); break;
+                 case -64:  LD_MEMOP_HANDLER(gadget, args[2],  off64_i64, a_bits, s_bits); break;
+                 case -96:  LD_MEMOP_HANDLER(gadget, args[2],  off96_i64, a_bits, s_bits); break;
+                 case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
+                 default:   LD_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i64); break;
+             }
++
+             // Args:
+-            // - an immediate32 encodes our operation index 
++            // - an immediate32 encodes our operation index
+             tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+             tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
+         }
+@@ -1105,6 +1287,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+         void *gadget;
+ 
+         switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
++            case -32:  ST_MEMOP_HANDLER(gadget, args[2],  off32_i32, a_bits, s_bits); break;
++            case -48:  ST_MEMOP_HANDLER(gadget, args[2],  off48_i32, a_bits, s_bits); break;
+             case -64:  ST_MEMOP_HANDLER(gadget, args[2],  off64_i32, a_bits, s_bits); break;
+             case -96:  ST_MEMOP_HANDLER(gadget, args[2],  off96_i32, a_bits, s_bits); break;
+             case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
+@@ -1113,7 +1297,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+         // Args:
+         // - our gadget encodes the target and address registers
+-        // - an immediate32 encodes our operation index 
++        // - an immediate32 encodes our operation index
+         tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+         tcg_out64(s, args[2]); // FIXME: double encoded
+         break;
+@@ -1127,36 +1311,23 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+         void *gadget;
+ 
+-        // Special optimization case: if we have an operation/target of 0x3A, 
+-        // this is a common case. Delegate to our special-case handler.
+-        if (args[2] == 0x3a) {
+-            switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
+-
+-                case -64: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_st_leq_aligned_mode3a_off64_i64 :
+-                        gadget_qemu_st_leq_unaligned_mode3a_off64_i64;
+-                    break;
+-                case -96: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_st_leq_aligned_mode3a_off96_i64 :
+-                        gadget_qemu_st_leq_unaligned_mode3a_off96_i64;
+-                    break;
+-                case -128: 
+-                    gadget = (a_bits >= s_bits) ? 
+-                        gadget_qemu_st_leq_aligned_mode3a_off128_i64 :
+-                        gadget_qemu_st_leq_unaligned_mode3a_off128_i64;
+-                    break;
+-
+-                default: 
+-                    gadget = gadget_qemu_st_leq_slowpath_mode3a_off0_i64;
+-                    break;
+-            }
++        // Special optimization case: if we have an common case.
++        // Delegate to our special-case handler.
++        if (args[2] == 0x02) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_ub, mode02)
+             tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+-        } 
++        } else if (args[2] == 0x32) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode32)
++            tcg_out_binary_gadget(s, gadget, args[0], args[1]);
++        } else if(args[2] == 0x3a) {
++            LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode3a)
++            tcg_out_binary_gadget(s, gadget, args[0], args[1]);
++        }
+         // Otherwise, handle the generic case.
+         else {
+             switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
++                case -32:  ST_MEMOP_HANDLER(gadget, args[2],  off32_i64, a_bits, s_bits); break;
++                case -48:  ST_MEMOP_HANDLER(gadget, args[2],  off48_i64, a_bits, s_bits); break;
+                 case -64:  ST_MEMOP_HANDLER(gadget, args[2],  off64_i64, a_bits, s_bits); break;
+                 case -96:  ST_MEMOP_HANDLER(gadget, args[2],  off96_i64, a_bits, s_bits); break;
+                 case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
+@@ -1165,7 +1336,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+ 
+             // Args:
+             // - our gadget encodes the target and address registers
+-            // - an immediate32 encodes our operation index 
++            // - an immediate32 encodes our operation index
+             tcg_out_binary_gadget(s, gadget, args[0], args[1]);
+             tcg_out64(s, args[2]); // FIXME: double encoded
+         }
+@@ -1183,7 +1354,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
+             [TCG_MO_LD_ST]                = gadget_mb_ld,
+             [TCG_MO_LD_ST | TCG_MO_LD_LD] = gadget_mb_ld,
+         };
+-        tcg_out_nullary_gadget(s, sync[args[0] & TCG_MO_ALL]);
++        tcg_out_gadget(s, sync[args[0] & TCG_MO_ALL]);
+ 
+         break;
+     }
+@@ -1203,9 +1374,9 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+                        intptr_t arg2)
+ {
+     if (type == TCG_TYPE_I32) {
+-        tcg_out_ldst_gadget(s, gadget_st_i32, arg, arg1, arg2); 
++        tcg_out_ldst_gadget(s, gadget_st_i32, arg, arg1, arg2);
+     } else {
+-        tcg_out_ldst_gadget(s, gadget_st_i64, arg, arg1, arg2); 
++        tcg_out_ldst_gadget(s, gadget_st_i64, arg, arg1, arg2);
+     }
+ }
+ 
+@@ -1221,19 +1392,629 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+     return ct & TCG_CT_CONST;
+ }
+ 
++/***************************
++ *  TCG Vector Operations  *
++ ***************************/
++
++//
++// Helper for emitting DUPI (immediate DUP) instructions.
++//
++#define tcg_out_dupi_gadget(s, name, q, rd, op, cmode, arg) \
++    if (q) { \
++        tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q1[rd][arg]); \
++    } else { \
++        tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q0[rd][arg]); \
++    }
++
++
++//
++// Helpers for emitting D/Q variant instructions.
++//
++#define tcg_out_dq_gadget(s, name, arity, is_q, args...) \
++    if (is_q) { \
++        tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _q, args); \
++    } else { \
++        tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _d, args); \
++    }
++
++#define tcg_out_unary_dq_gadget(s, name, is_q, a) \
++    tcg_out_dq_gadget(s, name, unary, is_q, a) 
++#define tcg_out_binary_dq_gadget(s, name, is_q, a, b) \
++    tcg_out_dq_gadget(s, name, binary, is_q, a, b)
++#define tcg_out_ternary_dq_gadget(s, name, is_q, a, b, c) \
++    tcg_out_dq_gadget(s, name, ternary, is_q, a, b, c)
++
++
++//
++// Helper for emitting the gadget appropriate for a vector's size.
++//
++#define tcg_out_sized_vector_gadget(s, name, arity, vece, args...) \
++    switch(vece) { \
++        case MO_8: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
++            } \
++            break; \
++        case MO_16: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
++            } \
++            break; \
++        case MO_32: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
++            } \
++            break; \
++        case MO_64: \
++            if (type == TCG_TYPE_V128) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2d, args); \
++            } \
++            else { \
++                g_assert_not_reached(); \
++            } \
++            break;  \
++        default: \
++            g_assert_not_reached(); \
++    } 
++#define tcg_out_sized_vector_gadget_no64(s, name, arity, vece, args...) \
++    switch(vece) { \
++        case MO_8: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
++            } \
++            break; \
++        case MO_16: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
++            } \
++            break; \
++        case MO_32: \
++            if (type == TCG_TYPE_V64) { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
++            } else { \
++                tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
++            } \
++            break; \
++        default: \
++            g_assert_not_reached(); \
++    } 
++
++
++#define tcg_out_unary_vector_gadget(s, name, vece, a) \
++    tcg_out_sized_vector_gadget(s, name, unary, vece, a)
++#define tcg_out_binary_vector_gadget(s, name, vece, a, b) \
++    tcg_out_sized_vector_gadget(s, name, binary, vece, a, b)
++#define tcg_out_ternary_vector_gadget(s, name, vece, a, b, c) \
++    tcg_out_sized_vector_gadget(s, name, ternary, vece, a, b, c)
++
++#define tcg_out_ternary_vector_gadget_no64(s, name, vece, a, b, c) \
++    tcg_out_sized_vector_gadget_no64(s, name, ternary, vece, a, b, c)
++
++
++#define tcg_out_ternary_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
++    if (is_scalar) { \
++        tcg_out_ternary_gadget(s, gadget_ ## name ## _scalar, w0, w1, w2); \
++    } else { \
++        tcg_out_ternary_vector_gadget(s, name, vece, w0, w1, w2); \
++    }
++
++
++/* Return true if v16 is a valid 16-bit shifted immediate.  */
++static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
++{
++    if (v16 == (v16 & 0xff)) {
++        *cmode = 0x8;
++        *imm8 = v16 & 0xff;
++        return true;
++    } else if (v16 == (v16 & 0xff00)) {
++        *cmode = 0xa;
++        *imm8 = v16 >> 8;
++        return true;
++    }
++    return false;
++}
++
++
++/** Core vector operation emission. */
++static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece,
++    const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS])
++{
++    TCGType type = vecl + TCG_TYPE_V64;
++    TCGArg r0, r1, r2, r3, w0, w1, w2, w3;
++
++    // Typing flags for vector operations.
++    bool is_v128 = (type == TCG_TYPE_V128);
++    bool is_scalar = !is_v128 && (vece == MO_64);
++
++    // Argument shortcuts.
++    r0 = args[0];
++    r1 = args[1];
++    r2 = args[2];
++    r3 = args[3];
++
++    // Offset argument shortcuts; offset to convert register numbers to gadget numberes.
++    w0 = args[0] - TCG_REG_V16;
++    w1 = args[1] - TCG_REG_V16;
++    w2 = args[2] - TCG_REG_V16;
++    w3 = args[3] - TCG_REG_V16;
++
++    // Argument shortcuts, as signed.
++    int64_t signed_offset_arg = (int32_t)args[2];
++
++    switch (opc) {
++
++    // Load memory -> vector: followed by a 64-bit offset immediate
++    case INDEX_op_ld_vec:
++        tcg_out_binary_dq_gadget(s, ldr, is_v128, w0, r1);
++        tcg_out64(s, signed_offset_arg);
++        break;
++    
++    // Store memory -> vector: followed by a 64-bit offset immediate
++    case INDEX_op_st_vec:
++        tcg_out_binary_dq_gadget(s, str, is_v128, w0, r1);
++        tcg_out64(s, signed_offset_arg);
++        break;
++
++    // Duplciate memory to all vector elements.
++    case INDEX_op_dupm_vec:
++        // DUPM handles normalization itself; pass arguments raw.
++        tcg_out_dupm_vec(s, type, vece, r0, r1, r2);
++        break;
++
++    case INDEX_op_add_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, add, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_sub_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, sub, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_mul_vec: // optional
++        tcg_out_ternary_vector_gadget_no64(s, mul, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_neg_vec: // optional
++        tcg_out_binary_vector_gadget(s, neg, vece, w0, w1);
++        break;
++
++    case INDEX_op_abs_vec: // optional
++        tcg_out_binary_vector_gadget(s, abs, vece, w0, w1);
++        break;
++
++    case INDEX_op_and_vec: // optional
++        tcg_out_ternary_dq_gadget(s, and, is_v128, w0, w1, w2);
++        break;
++
++    case INDEX_op_or_vec:
++        tcg_out_ternary_dq_gadget(s, or, is_v128, w0, w1, w2);
++        break;
++
++    case INDEX_op_andc_vec:
++        tcg_out_ternary_dq_gadget(s, andc, is_v128, w0, w1, w2);
++        break;
++
++    case INDEX_op_orc_vec: // optional
++        tcg_out_ternary_dq_gadget(s, orc, is_v128, w0, w1, w2);
++        break;
++
++    case INDEX_op_xor_vec:
++        tcg_out_ternary_dq_gadget(s, xor, is_v128, w0, w1, w2);
++        break;
++
++    case INDEX_op_ssadd_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, ssadd, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_sssub_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, sssub, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_usadd_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, usadd, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_ussub_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, ussub, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_smax_vec:
++        tcg_out_ternary_vector_gadget_no64(s, smax, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_smin_vec:
++        tcg_out_ternary_vector_gadget_no64(s, smin, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_umax_vec:
++        tcg_out_ternary_vector_gadget_no64(s, umax, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_umin_vec:
++        tcg_out_ternary_vector_gadget_no64(s, umin, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_not_vec: // optional
++        tcg_out_binary_dq_gadget(s, not, is_v128, w0, w1);
++        break;
++
++    case INDEX_op_shlv_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, shlv, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_aa64_sshl_vec:
++        tcg_out_ternary_vector_gadget_with_scalar(s, sshl, is_scalar, vece, w0, w1, w2);
++        break;
++
++    case INDEX_op_cmp_vec:
++        switch (args[3]) {
++            case TCG_COND_EQ:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
++                break;
++            case TCG_COND_NE:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
++                tcg_out_binary_dq_gadget(s, not, is_v128, w0, w0);
++                break;
++            case TCG_COND_GT:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w1, w2);
++                break;
++            case TCG_COND_LE:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w2, w1);
++                break;
++            case TCG_COND_GE:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w1, w2);
++                break;
++            case TCG_COND_LT:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w2, w1);
++                break;
++            case TCG_COND_GTU:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w1, w2);
++                break;
++            case TCG_COND_LEU:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w2, w1);
++                break;
++            case TCG_COND_GEU:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w1, w2);
++                break;
++            case TCG_COND_LTU:
++                tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w2, w1);
++                break;
++            default:
++                g_assert_not_reached();
++        }
++        break;
++
++    case INDEX_op_bitsel_vec: // optional
++    {
++        if (r0 == r3) {
++            tcg_out_ternary_dq_gadget(s, bit, is_v128, w0, w2, w1);
++        } else if (r0 == r2) {
++            tcg_out_ternary_dq_gadget(s, bif, is_v128, w0, w3, w1);
++        } else {
++            if (r0 != r1) {
++                tcg_out_mov(s, type, r0, r1);
++            }
++            tcg_out_ternary_dq_gadget(s, bsl, is_v128, w0, w2, w3);
++        }
++        break;
++    }
++
++    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
++    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
++    default:
++        g_assert_not_reached();
++    }
++}
++
++
++int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
++{
++    switch (opc) {
++    case INDEX_op_add_vec:
++    case INDEX_op_sub_vec:
++    case INDEX_op_and_vec:
++    case INDEX_op_or_vec:
++    case INDEX_op_xor_vec:
++    case INDEX_op_andc_vec:
++    case INDEX_op_orc_vec:
++    case INDEX_op_neg_vec:
++    case INDEX_op_abs_vec:
++    case INDEX_op_not_vec:
++    case INDEX_op_cmp_vec:
++    case INDEX_op_ssadd_vec:
++    case INDEX_op_sssub_vec:
++    case INDEX_op_usadd_vec:
++    case INDEX_op_ussub_vec:
++    case INDEX_op_shlv_vec:
++    case INDEX_op_bitsel_vec:
++        return 1;
++    case INDEX_op_rotli_vec:
++    case INDEX_op_shrv_vec:
++    case INDEX_op_sarv_vec:
++    case INDEX_op_rotlv_vec:
++    case INDEX_op_rotrv_vec:
++        return -1;
++    case INDEX_op_mul_vec:
++    case INDEX_op_smax_vec:
++    case INDEX_op_smin_vec:
++    case INDEX_op_umax_vec:
++    case INDEX_op_umin_vec:
++        return vece < MO_64;
++
++    default:
++        return 0;
++    }
++}
++
++void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
++                       TCGArg a0, ...)
++{
++    va_list va;
++    TCGv_vec v0, v1, v2, t1, t2, c1;
++    TCGArg a2;
++
++
++    va_start(va, a0);
++    v0 = temp_tcgv_vec(arg_temp(a0));
++    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
++    a2 = va_arg(va, TCGArg);
++    va_end(va);
++
++    switch (opc) {
++    case INDEX_op_shrv_vec:
++    case INDEX_op_sarv_vec:
++        /* Right shifts are negative left shifts for AArch64.  */
++        v2 = temp_tcgv_vec(arg_temp(a2));
++        t1 = tcg_temp_new_vec(type);
++        tcg_gen_neg_vec(vece, t1, v2);
++        opc = (opc == INDEX_op_shrv_vec
++               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
++        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
++                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
++        tcg_temp_free_vec(t1);
++        break;
++
++    case INDEX_op_rotlv_vec:
++        v2 = temp_tcgv_vec(arg_temp(a2));
++        t1 = tcg_temp_new_vec(type);
++        c1 = tcg_constant_vec(type, vece, 8 << vece);
++        tcg_gen_sub_vec(vece, t1, v2, c1);
++        /* Right shifts are negative left shifts for AArch64.  */
++        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
++                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
++        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
++                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
++        tcg_gen_or_vec(vece, v0, v0, t1);
++        tcg_temp_free_vec(t1);
++        break;
++
++    case INDEX_op_rotrv_vec:
++        v2 = temp_tcgv_vec(arg_temp(a2));
++        t1 = tcg_temp_new_vec(type);
++        t2 = tcg_temp_new_vec(type);
++        c1 = tcg_constant_vec(type, vece, 8 << vece);
++        tcg_gen_neg_vec(vece, t1, v2);
++        tcg_gen_sub_vec(vece, t2, c1, v2);
++        /* Right shifts are negative left shifts for AArch64.  */
++        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
++                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
++        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
++                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
++        tcg_gen_or_vec(vece, v0, t1, t2);
++        tcg_temp_free_vec(t1);
++        tcg_temp_free_vec(t2);
++        break;
++
++    default:
++        g_assert_not_reached();
++    }
++}
++
++
++/* Generate DUPI (move immediate) vector ops. */
++static bool tcg_out_optimized_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
++{
++    bool q = (type == TCG_TYPE_V128);
++    int cmode, imm8, i;
++
++    // If we're copying an 8b immediate, we implicitly have a simple gadget for this,
++    // since there are only 256 possible values * 16 registers. Emit a MOVI gadget implicitly.
++    if (vece == MO_8) {
++        imm8 = (uint8_t)v64;
++        tcg_out_dupi_gadget(s, movi, q, rd, 0, e, imm8);
++        return true;
++    }
++
++    // Otherwise, if we have a value that's all 0x00 and 0xFF bytes,
++    // we can use the scalar variant of MOVI (op=1, cmode=e), which handles
++    // that case directly.
++    for (i = imm8 = 0; i < 8; i++) {
++        uint8_t byte = v64 >> (i * 8);
++        if (byte == 0xff) {
++            imm8 |= 1 << i;
++        } else if (byte != 0) {
++            goto fail_bytes;
++        }
++    }
++    tcg_out_dupi_gadget(s, movi, q, rd, 1, e, imm8);
++    return true;
++ fail_bytes:
++
++    // Handle 16B moves.
++    if (vece == MO_16) {
++        uint16_t v16 = v64;
++
++        // Check to see if we have a value representable in as a MOV imm8, possibly via a shift.
++        if (is_shimm16(v16, &cmode, &imm8)) {
++            // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
++            if (cmode == 0x8) {
++                tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, imm8);
++            } else {
++                tcg_out_dupi_gadget(s, movi, q, rd, 0, a, imm8);
++            }
++            return true;
++        }
++
++        // Check to see if we have a value representable in as an inverted MOV imm8, possibly via a shift.
++        if (is_shimm16(~v16, &cmode, &imm8)) {
++            // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
++            if (cmode == 0x8) {
++                tcg_out_dupi_gadget(s, mvni, q, rd, 0, 8, imm8);
++            } else {
++                tcg_out_dupi_gadget(s, mvni, q, rd, 0, a, imm8);
++            }
++            return true;
++        }
++
++        // If we can't perform either of the optimizations, we'll need to do this in two steps.
++        // Normally, we'd emit a gadget for both steps, but in this case that'd result in needing -way-
++        // too many gadgets. We'll emit two, instead.
++        tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, v16 & 0xff);
++        tcg_out_dupi_gadget(s, orr,  q, rd, 0, a, v16 >> 8);
++        return true;
++    }
++
++    // FIXME: implement 32B move optimizations
++
++     
++    // Try to create optimized 32B moves.
++    //else if (vece == MO_32) {
++    //    uint32_t v32 = v64;
++    //    uint32_t n32 = ~v32;
++
++    //    if (is_shimm32(v32, &cmode, &imm8) ||
++    //        is_soimm32(v32, &cmode, &imm8) ||
++    //        is_fimm32(v32, &cmode, &imm8)) {
++    //        tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
++    //        return;
++    //    }
++    //    if (is_shimm32(n32, &cmode, &imm8) ||
++    //        is_soimm32(n32, &cmode, &imm8)) {
++    //        tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
++    //        return;
++    //    }
++
++    //    //
++    //    // Restrict the set of constants to those we can load with
++    //    // two instructions.  Others we load from the pool.
++    //    //
++    //    i = is_shimm32_pair(v32, &cmode, &imm8);
++    //    if (i) {
++    //        tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
++    //        tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
++    //        return;
++    //    }
++    //    i = is_shimm32_pair(n32, &cmode, &imm8);
++    //    if (i) {
++    //        tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
++    //        tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
++    //        return;
++    //    }
++    //} 
++
++    return false;
++}
++
++
++/* Emits instructions that can load an immediate into a vector. */
++static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
++{
++    // Convert Rd into a simple gadget number.
++    rd = rd - (TCG_REG_V16);
++
++    // First, try to create an optimized implementation, if possible.
++    if (tcg_out_optimized_dupi_vec(s, type, vece, rd, v64)) {
++        return;
++    }
++
++    // If we didn't, we'll need to load the full vector from memory.
++    // Emit it into our bytecode stream as an immediate; which we'll then
++    // load inside the gadget.
++    if (type == TCG_TYPE_V128) {
++        tcg_out_unary_gadget(s, gadget_ldi_q, rd);
++        tcg_out64(s, v64);
++        tcg_out64(s, v64);
++    } else {
++        tcg_out_unary_gadget(s, gadget_ldi_d, rd);
++        tcg_out64(s, v64);
++    }
++}
++
++
++/* Emits instructions that can load a register into a vector. */
++static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, TCGReg rs)
++{
++    // Compute the gadget index for the relevant vector register.
++    TCGReg wd = rd - (TCG_REG_V16);
++
++    // Emit a DUP gadget to handles the operation.
++    tcg_out_binary_vector_gadget(s, dup, vece, wd, rs);
++    return true;
++}
++
++static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg base, intptr_t offset)
++{
++    int64_t extended_offset = (int32_t)offset;
++
++    // Convert the register into a simple register number for our gadgets.
++    r = r - TCG_REG_V16;
++
++    // Emit a DUPM gadget...
++    tcg_out_binary_vector_gadget(s, dupm, vece, r, base);
++
++    // ... and emit its int64 immediate offset.
++    tcg_out64(s, extended_offset);
++
++    return true;
++}
++
++
++/********************************
++ *  TCG Runtime & Platform Def  *
++ *******************************/
++
+ static void tcg_target_init(TCGContext *s)
+ {
+     /* The current code uses uint8_t for tcg operations. */
+     tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX);
+ 
+-    /* Registers available for 32 bit operations. */
+-    tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
+-    /* Registers available for 64 bit operations. */
+-    tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
+-
+-    /* TODO: Which registers should be set here? */
+-    tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1;
++    // Registers available for each type of operation.
++    tcg_target_available_regs[TCG_TYPE_I32]  = TCG_MASK_GP_REGISTERS;
++    tcg_target_available_regs[TCG_TYPE_I64]  = TCG_MASK_GP_REGISTERS;
++    tcg_target_available_regs[TCG_TYPE_V64]  = TCG_MASK_VECTOR_REGISTERS;
++    tcg_target_available_regs[TCG_TYPE_V128] = TCG_MASK_VECTOR_REGISTERS;
++
++    TCGReg unclobbered_registers[] = {
++        // We don't use registers R16+ in our runtime, so we'll not bother protecting them.
++        TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
++        TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
++        TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
++        TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
++
++        // Per our calling convention.
++        TCG_REG_V8,  TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11,
++        TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
++   };
++
++    // Specify which registers are clobbered during call.
++    tcg_target_call_clobber_regs = -1ull;
++    for (unsigned i = 0; i < ARRAY_SIZE(unclobbered_registers); ++i) {
++        tcg_regset_reset_reg(tcg_target_call_clobber_regs, unclobbered_registers[i]);
++    }
+ 
++    // Specify which local registers we're reserving.
++    //
++    // Note that we only have to specify registers that are used in the runtime,
++    // and so not e.g. the register that contains AREG0, which can never be allocated.
+     s->reserved_regs = 0;
+     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+ 
+@@ -1292,8 +2073,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_
+ 
+         : [return_value] "=m" (return_value)
+ 
+-        : [areg0]        "m"  (env), 
+-          [sp_value]     "m"  (sp_value), 
++        : [areg0]        "m"  (env),
++          [sp_value]     "m"  (sp_value),
+           [start_tb_ptr] "m"  (v_tb_ptr),
+           [pc_mirror]    "m"  (pc_mirror)
+ 
+@@ -1318,8 +2099,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_
+ /* Disassemble TCI bytecode. */
+ int print_insn_tcti(bfd_vma addr, disassemble_info *info)
+ {
++
++#ifdef TCTI_GADGET_RICH_DISASSEMBLY
+     Dl_info symbol_info = {};
+     char symbol_name[48] ;
++#endif
+ 
+     int status;
+     uint64_t block;
+@@ -1331,16 +2115,22 @@ int print_insn_tcti(bfd_vma addr, disassemble_info *info)
+         return -1;
+     }
+ 
++#ifdef TCTI_GADGET_RICH_DISASSEMBLY
+     // Most of our disassembly stream will be gadgets. Try to get their names, for nice output.
+     dladdr((void *)block, &symbol_info);
+ 
+     if(symbol_info.dli_sname != 0) {
+-        strlcpy(symbol_name, symbol_info.dli_sname, 47);
++        strncpy(symbol_name, symbol_info.dli_sname, sizeof(symbol_name));
++        symbol_name[sizeof(symbol_name) - 1] = 0;
+         info->fprintf_func(info->stream, "%s", symbol_name);
+     } else {
+-        info->fprintf_func(info->stream, "%016llx", block);
++        info->fprintf_func(info->stream, "%016lx", block);
+     }
+ 
++#else
++    info->fprintf_func(info->stream, "%016lx", block);
++#endif
++
+     return sizeof(block);
+ }
+ 
+diff --git a/tcg/aarch64-tcti/tcg-target.h b/tcg/aarch64-tcti/tcg-target.h
+index 7eb3bb1c3d..bf4e7e2772 100644
+--- a/tcg/aarch64-tcti/tcg-target.h
++++ b/tcg/aarch64-tcti/tcg-target.h
+@@ -56,8 +56,11 @@
+ // weird psuedo-native bytecode. We'll indicate that we're intepreted.
+ #define TCG_TARGET_INTERPRETER 1
+ 
++// Specify we'll handle direct jumps.
++#define TCG_TARGET_HAS_direct_jump      1
++
+ //
+-// Supported optional instructions.
++// Supported optional scalar instructions.
+ //
+ 
+ // Divs.
+@@ -78,23 +81,35 @@
+ #define TCG_TARGET_HAS_ext16u_i64       1
+ #define TCG_TARGET_HAS_ext32u_i64       1
+ 
+-// Logicals.
++// Register extractions.
++#define TCG_TARGET_HAS_extrl_i64_i32    1
++#define TCG_TARGET_HAS_extrh_i64_i32    1
++
++// Negations.
+ #define TCG_TARGET_HAS_neg_i32          1
+ #define TCG_TARGET_HAS_not_i32          1
+ #define TCG_TARGET_HAS_neg_i64          1
+ #define TCG_TARGET_HAS_not_i64          1
+ 
++// Logicals.
+ #define TCG_TARGET_HAS_andc_i32         1
+ #define TCG_TARGET_HAS_orc_i32          1
+ #define TCG_TARGET_HAS_eqv_i32          1
++#define TCG_TARGET_HAS_rot_i32          1
++#define TCG_TARGET_HAS_nand_i32         1
++#define TCG_TARGET_HAS_nor_i32          1
+ #define TCG_TARGET_HAS_andc_i64         1
+ #define TCG_TARGET_HAS_eqv_i64          1
+ #define TCG_TARGET_HAS_orc_i64          1
++#define TCG_TARGET_HAS_rot_i64          1
++#define TCG_TARGET_HAS_nor_i64          1
++#define TCG_TARGET_HAS_nand_i64         1
+ 
+-// We don't curretly support rotates, since AArch64 lacks ROL.
+-// We'll fix this later.
+-#define TCG_TARGET_HAS_rot_i32          0
+-#define TCG_TARGET_HAS_rot_i64          0
++// Bitwise operations.
++#define TCG_TARGET_HAS_clz_i32          1
++#define TCG_TARGET_HAS_ctz_i32          1
++#define TCG_TARGET_HAS_clz_i64          1
++#define TCG_TARGET_HAS_ctz_i64          1
+ 
+ // Swaps.
+ #define TCG_TARGET_HAS_bswap16_i32      1
+@@ -104,53 +119,58 @@
+ #define TCG_TARGET_HAS_bswap64_i64      1
+ #define TCG_TARGET_HAS_MEMORY_BSWAP     1
+ 
+-// Specify we'll handle direct jumps.
+-#define TCG_TARGET_HAS_direct_jump      1
+-
+ //
+-// Potential TODOs.
++// Supported optional vector instructions.
+ //
+ 
+-// TODO: implement DEPOSIT as BFI.
+-#define TCG_TARGET_HAS_deposit_i32      0
+-#define TCG_TARGET_HAS_deposit_i64      0
+-
+-// TODO: implement EXTRACT as BFX.
+-#define TCG_TARGET_HAS_extract_i32      0
+-#define TCG_TARGET_HAS_sextract_i32     0
+-#define TCG_TARGET_HAS_extract_i64      0
+-#define TCG_TARGET_HAS_sextract_i64     0
+-
+-// TODO: it might be worth writing a gadget for this
+-#define TCG_TARGET_HAS_movcond_i32      0
+-#define TCG_TARGET_HAS_movcond_i64      0
++#define TCG_TARGET_HAS_v64              1
++#define TCG_TARGET_HAS_v128             1
++#define TCG_TARGET_HAS_v256             0
++
++#define TCG_TARGET_HAS_andc_vec         1
++#define TCG_TARGET_HAS_orc_vec          1
++#define TCG_TARGET_HAS_nand_vec         0
++#define TCG_TARGET_HAS_nor_vec          0
++#define TCG_TARGET_HAS_eqv_vec          0
++#define TCG_TARGET_HAS_not_vec          1
++#define TCG_TARGET_HAS_neg_vec          1
++#define TCG_TARGET_HAS_abs_vec          1
++#define TCG_TARGET_HAS_roti_vec         0
++#define TCG_TARGET_HAS_rots_vec         0
++#define TCG_TARGET_HAS_rotv_vec         0
++#define TCG_TARGET_HAS_shi_vec          0
++#define TCG_TARGET_HAS_shs_vec          0
++#define TCG_TARGET_HAS_shv_vec          1
++#define TCG_TARGET_HAS_mul_vec          1
++#define TCG_TARGET_HAS_sat_vec          1
++#define TCG_TARGET_HAS_minmax_vec       1
++#define TCG_TARGET_HAS_bitsel_vec       1
++#define TCG_TARGET_HAS_cmpsel_vec       0
+ 
+ //
+ // Unsupported instructions.
+ //
+ 
+-// ARMv8 doesn't have instructions for NAND/NOR.
+-#define TCG_TARGET_HAS_nand_i32         0
+-#define TCG_TARGET_HAS_nor_i32          0
+-#define TCG_TARGET_HAS_nor_i64          0
+-#define TCG_TARGET_HAS_nand_i64         0
+-
+-// aarch64's CLZ is implemented without a condition, so it
+-#define TCG_TARGET_HAS_clz_i32          0
+-#define TCG_TARGET_HAS_ctz_i32          0
++// There's no direct instruction with which to count the number of ones,
++// so we'll leave this implemented as other instructions.
+ #define TCG_TARGET_HAS_ctpop_i32        0
+-#define TCG_TARGET_HAS_clz_i64          0
+-#define TCG_TARGET_HAS_ctz_i64          0
+ #define TCG_TARGET_HAS_ctpop_i64        0
+ 
+-// We don't have a simple gadget for this, since we're always assuming softmmu.
+-#define TCG_TARGET_HAS_qemu_st8_i32     0
+-
+-// No AArch64 equivalent.a
+-#define TCG_TARGET_HAS_extrl_i64_i32    0
+-#define TCG_TARGET_HAS_extrh_i64_i32    0
++// We don't currently support gadgets with more than three arguments,
++// so we can't yet create movcond, deposit, or extract gadgets.
++#define TCG_TARGET_HAS_movcond_i32      0
++#define TCG_TARGET_HAS_movcond_i64      0
++#define TCG_TARGET_HAS_deposit_i32      0
++#define TCG_TARGET_HAS_deposit_i64      0
++#define TCG_TARGET_HAS_extract_i32      0
++#define TCG_TARGET_HAS_sextract_i32     0
++#define TCG_TARGET_HAS_extract_i64      0
++#define TCG_TARGET_HAS_sextract_i64     0
+ 
+-#define TCG_TARGET_HAS_extract2_i64     0
++// This operation exists specifically to allow us to provide differing register
++// constraints for 8-bit loads and stores. We don't need to do so, so we'll leave
++// this unimplemented, as we gain nothing by it.
++#define TCG_TARGET_HAS_qemu_st8_i32     0
+ 
+ // These should always be zero on our 64B platform.
+ #define TCG_TARGET_HAS_muls2_i64        0
+@@ -166,36 +186,55 @@
+ #define TCG_TARGET_HAS_muls2_i32        0
+ #define TCG_TARGET_HAS_muluh_i32        0
+ #define TCG_TARGET_HAS_mulsh_i32        0
++#define TCG_TARGET_HAS_extract2_i64     0
+ 
+ //
+ // Platform metadata.
+ //
+ 
+ // Number of registers available.
+-// It might make sense to up these, since we can also use x16 -> x25?
+-#define TCG_TARGET_NB_REGS 16
++#define TCG_TARGET_NB_REGS 64
++
++// Number of general purpose registers.
++#define TCG_TARGET_GP_REGS 16
+ 
+ /* List of registers which are used by TCG. */
+ typedef enum {
+-    TCG_REG_R0 = 0,
+-    TCG_REG_R1,
+-    TCG_REG_R2,
+-    TCG_REG_R3,
+-    TCG_REG_R4,
+-    TCG_REG_R5,
+-    TCG_REG_R6,
+-    TCG_REG_R7,
+-    TCG_REG_R8,
+-    TCG_REG_R9,
+-    TCG_REG_R10,
+-    TCG_REG_R11,
+-    TCG_REG_R12,
+-    TCG_REG_R13,
+-    TCG_REG_R14,
+-    TCG_REG_R15,
+-
+-    TCG_AREG0          = TCG_REG_R14,
+-    TCG_REG_CALL_STACK = TCG_REG_R15,
++
++    // General purpose registers.
++    // Note that we name every _host_ register here; but don't 
++    // necessarily use them; that's determined by the allocation order
++    // and the number of registers setting above. These just give us the ability
++    // to refer to these by name.
++    TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
++    TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
++    TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
++    TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
++    TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
++    TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
++    TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
++    TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
++
++    // Register aliases.
++    TCG_AREG0             = TCG_REG_R14,
++    TCG_REG_CALL_STACK    = TCG_REG_R15,
++
++    // Mask that refers to the GP registers.
++    TCG_MASK_GP_REGISTERS = 0xFFFFul, 
++
++    // Vector registers.
++    TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
++    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
++    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
++    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
++    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
++    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
++    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
++    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
++
++    // Mask that refers to the vector registers.
++    TCG_MASK_VECTOR_REGISTERS = 0xFFFF000000000000ul, 
++
+ } TCGReg;
+ 
+ // Specify the shape of the stack our runtime will use.
+diff --git a/tcg/aarch64-tcti/tcg-target.opc.h b/tcg/aarch64-tcti/tcg-target.opc.h
+new file mode 100644
+index 0000000000..26bfd9c460
+--- /dev/null
++++ b/tcg/aarch64-tcti/tcg-target.opc.h
+@@ -0,0 +1,14 @@
++/*
++ * Copyright (c) 2019 Linaro
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or
++ * (at your option) any later version.
++ *
++ * See the COPYING file in the top-level directory for details.
++ *
++ * Target-specific opcodes for host vector expansion.  These will be
++ * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
++ * consider these to be UNSPEC with names.
++ */
++
++DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
+diff --git a/tcg/aarch64-tcti/tcti-gadget-gen.py b/tcg/aarch64-tcti/tcti-gadget-gen.py
+index fa0232fefa..4e127ff8c3 100755
+--- a/tcg/aarch64-tcti/tcti-gadget-gen.py
++++ b/tcg/aarch64-tcti/tcti-gadget-gen.py
+@@ -4,17 +4,10 @@
+ Generates a C-code include file containing 'gadgets' for use by TCTI.
+ """
+ 
++import os
+ import sys
+ import itertools
+ 
+-# Get a handle on the file we'll be working with, and redirect print to it.
+-if len(sys.argv) > 1:
+-    out_file = open(sys.argv[1], "w")
+-
+-    # Hook our print function, so it always outputs to the relevant file.
+-    core_print = print
+-    print = lambda *a, **k : core_print(*a, **k, file=out_file)
+-
+ # Epilogue code follows at the end of each gadget, and handles continuing execution.
+ EPILOGUE = ( 
+     # Load our next gadget address from our bytecode stream, advancing it.
+@@ -32,41 +25,113 @@
+ # Helper that provides each of the AArch64 condition codes of interest.
+ ARCH_CONDITION_CODES = ["eq", "ne", "lt", "ge", "le", "gt", "lo", "hs", "ls", "hi"]
+ 
++# The list of vector size codes supported on this platform.
++VECTOR_SIZES = ['16b', '8b', '4h', '8h', '2s', '4s', '2d']
++
+ # We'll create a variety of gadgets that assume the MMU's TLB is stored at certain
+ # offsets into its structure. These should match the offsets in tcg-target.c.in.
+-QEMU_ALLOWED_MMU_OFFSETS = [ 64, 96, 128 ]
++QEMU_ALLOWED_MMU_OFFSETS = [ 32, 48, 64, 96, 128 ]
+ 
+ # Statistics.
+ gadgets      = 0
+ instructions = 0
+ 
+-def simple(name, *lines):
++# Files to write to.
++current_collection = "basic"
++output_files = {}
++
++# Create a top-level header.
++top_header = open("tcti_gadgets.h", "w")
++print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=top_header)
++
++def _get_output_files():
++    """ Gathers the output C and H files for a given gadget-cluster name. """
++
++    # If we don't have an output file for this already, create it.
++    return output_files[current_collection]
++
++
++def START_COLLECTION(name):
++    """ Sets the name of the current collection. """
++
++    global current_collection
++
++    # If we already have a collection for this, skip it.
++    if name in output_files:
++        return
++
++    # Create the relevant output files
++    new_c_file = open(f"tcti_{name}_gadgets.c", "w")
++    new_h_file = open(f"tcti_{name}_gadgets.h", "w")
++    output_files[name] = (new_c_file, new_h_file)
++
++    # Add the file to our gadget collection.
++    print(f'#include "tcti_{name}_gadgets.h"', file=top_header)
++
++    # Add generated messages to the relevant collection.
++    print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=new_c_file)
++    print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=new_h_file)
++
++    # Start our C file with inclusion of the relevant header.
++    print(f'\n#include "tcti_{name}_gadgets.h"\n', file=new_c_file)
++
++    # Start our H file with a simple pragma-guard, for speed.
++    print('\n#pragma once\n', file=new_h_file)
++
++    # Finally, set the global active collection.
++    current_collection = name
++    
++
++def simple(name, *lines, export=True):
+     """ Generates a simple gadget that needs no per-register specialization. """
+ 
+     global gadgets, instructions
+ 
+     gadgets += 1
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
+     # Create our C/ASM framing.
+-    #print(f"__attribute__((naked)) static void gadget_{name}(void)")
+-    print(f"__attribute__((naked)) static void gadget_{name}(void);")
+-    print(f"__attribute__((naked)) static void gadget_{name}(void)")
+-    print("{")
++    if export:
++        print(f"__attribute__((naked)) void gadget_{name}(void);", file=h_file)
++        print(f"__attribute__((naked)) void gadget_{name}(void)", file=c_file)
++    else:
++        print(f"static __attribute__((naked)) void gadget_{name}(void)", file=c_file)
++
++    print("{", file=c_file)
+ 
+     # Add the core gadget
+-    print("\tasm(")
++    print("\tasm(", file=c_file)
+     for line in lines + EPILOGUE:
+-        print(f"\t\t\"{line} \\n\"")
++        print(f"\t\t\"{line} \\n\"", file=c_file)
+         instructions += 1
+-    print("\t);")
++    print("\t);", file=c_file)
+ 
+     # End our framing.
+-    print("}\n")
++    print("}\n", file=c_file)
++
+ 
+ 
+ def with_register_substitutions(name, substitutions, *lines, immediate_range=range(0)):
+     """ Generates a collection of gadgtes with register substitutions. """
+ 
++    def _expand_op1_immediate(num):
++        """ Gets a uncompressed bitfield argument for a given immediate; for NEON instructions. 
++        
++        Duplciates each bit eight times; converting 0b0100 to 0x00FF0000.
++        """
++
++        # Get the number as a binary string...
++        binstring = bin(num)[2:]
++
++        # ... expand out the values to hex...
++        hex_string = binstring.replace('1', 'FF').replace('0', '00') 
++
++        # ... and return out the new constant.
++        return f"0x{hex_string}"
++
++
+     def substitutions_for_letter(letter, number, line):
+         """ Helper that transforms Wd => w1, implementing gadget substitutions. """
+ 
+@@ -74,8 +139,16 @@ def substitutions_for_letter(letter, number, line):
+         line = line.replace(f"X{letter}", f"x{number}")
+         line = line.replace(f"W{letter}", f"w{number}")
+ 
+-        # ... immediate substitutions.
++        # ... vector register substitutions...
++        line = line.replace(f"V{letter}", f"v{number + 16}")
++        line = line.replace(f"D{letter}", f"d{number + 16}")
++        line = line.replace(f"Q{letter}", f"q{number + 16}")
++
++        # ... regular immediate substitutions...
+         line = line.replace(f"I{letter}", f"{number}")
++
++        # ... and compressed immediate substitutions.
++        line = line.replace(f"S{letter}", f"{_expand_op1_immediate(number)}")
+         return line
+ 
+         
+@@ -105,77 +178,94 @@ def substitutions_for_letter(letter, number, line):
+ 
+         # ... and emit the gadget.
+         permutation_id = "_arg".join(str(number) for number in permutation)
+-        simple(f"{name}_arg{permutation_id}", *new_lines)
++        simple(f"{name}_arg{permutation_id}", *new_lines, export=False)
+ 
+ 
+ def with_dnm(name, *lines):
+     """ Generates a collection of gadgets with substitutions for Xd, Xn, and Xm, and equivalents. """
+     with_register_substitutions(name, ("d", "n", "m"), *lines)
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
++    # Print out an extern.
++    print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}];", file=h_file)
++
+     # Print out an array that contains all of our gadgets, for lookup.
+-    print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="")
+-    print("{")
++    print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
++    print("{", file=c_file)
+ 
+     # D array
+     for d in TCG_REGISTER_NUMBERS:
+-        print("\t{")
++        print("\t{", file=c_file)
+ 
+         # N array
+         for n in TCG_REGISTER_NUMBERS:
+-            print("\t\t{", end="")
++            print("\t\t{", end="", file=c_file)
+ 
+             # M array
+             for m in TCG_REGISTER_NUMBERS:
+-                print(f"gadget_{name}_arg{d}_arg{n}_arg{m}", end=", ")
++                print(f"gadget_{name}_arg{d}_arg{n}_arg{m}", end=", ", file=c_file)
+ 
+-            print("},")
+-        print("\t},")
+-    print("};")
++            print("},", file=c_file)
++        print("\t},", file=c_file)
++    print("};", file=c_file)
+ 
+ 
+ def with_dn_immediate(name, *lines, immediate_range):
+     """ Generates a collection of gadgets with substitutions for Xd, Xn, and Xm, and equivalents. """
+     with_register_substitutions(name, ["d", "n"], *lines, immediate_range=immediate_range)
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
++    # Print out an extern.
++    print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}];", file=h_file)
++
+     # Print out an array that contains all of our gadgets, for lookup.
+-    print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="")
+-    print("{")
++    print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="", file=c_file)
++    print("{", file=c_file)
+ 
+     # D array
+     for d in TCG_REGISTER_NUMBERS:
+-        print("\t{")
++        print("\t{", file=c_file)
+ 
+         # N array
+         for n in TCG_REGISTER_NUMBERS:
+-            print("\t\t{", end="")
++            print("\t\t{", end="", file=c_file)
+ 
+             # M array
+             for i in immediate_range:
+-                print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ")
++                print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ", file=c_file)
+ 
+-            print("},")
+-        print("\t},")
+-    print("};")
++            print("},", file=c_file)
++        print("\t},", file=c_file)
++    print("};", file=c_file)
+ 
+ 
+ def with_pair(name, substitutions, *lines):
+     """ Generates a collection of gadgets with two subtstitutions."""
+     with_register_substitutions(name, substitutions, *lines)
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
++    print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}];", file=h_file)
++
+     # Print out an array that contains all of our gadgets, for lookup.
+-    print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="")
+-    print("{")
++    print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
++    print("{", file=c_file)
+ 
+     # N array
+     for a in TCG_REGISTER_NUMBERS:
+-        print("\t\t{", end="")
++        print("\t\t{", end="", file=c_file)
+ 
+         # M array
+         for b in TCG_REGISTER_NUMBERS:
+-            print(f"gadget_{name}_arg{a}_arg{b}", end=", ")
++            print(f"gadget_{name}_arg{a}_arg{b}", end=", ", file=c_file)
+ 
+-        print("},")
+-    print("};")
++        print("},", file=c_file)
++    print("};", file=c_file)
+ 
+ 
+ def math_dnm(name, mnemonic):
+@@ -183,10 +273,10 @@ def math_dnm(name, mnemonic):
+     with_dnm(f'{name}_i32', f"{mnemonic} Wd, Wn, Wm")
+     with_dnm(f'{name}_i64', f"{mnemonic} Xd, Xn, Xm")
+ 
+-def math_dn(name, mnemonic):
++def math_dn(name, mnemonic, source_is_wn=False):
+     """ Equivalent to `with_dn`, but creates a _i32 and _i64 variant. For simple math. """
+     with_dn(f'{name}_i32', f"{mnemonic} Wd, Wn")
+-    with_dn(f'{name}_i64', f"{mnemonic} Xd, Xn")
++    with_dn(f'{name}_i64', f"{mnemonic} Xd, Wn" if source_is_wn else f"{mnemonic} Xd, Xn")
+ 
+ 
+ def with_nm(name, *lines):
+@@ -227,34 +317,44 @@ def with_single(name, substitution, *lines):
+     """ Generates a collection of gadgets with two subtstitutions."""
+     with_register_substitutions(name, (substitution,), *lines)
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
++    print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}];", file=h_file)
++
+     # Print out an array that contains all of our gadgets, for lookup.
+-    print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}] = ", end="")
+-    print("{")
++    print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
++    print("{", file=c_file)
+ 
+     for n in TCG_REGISTER_NUMBERS:
+-        print(f"gadget_{name}_arg{n}", end=", ")
++        print(f"gadget_{name}_arg{n}", end=", ", file=c_file)
+ 
+-    print("};")
++    print("};", file=c_file)
+ 
+ 
+ def with_d_immediate(name, *lines, immediate_range=range(0)):
+     """ Generates a collection of gadgets with two subtstitutions."""
+     with_register_substitutions(name, ['d'], *lines, immediate_range=immediate_range)
+ 
++    # Fetch the files we'll be using for output.
++    c_file, h_file = _get_output_files()
++
++    print(f"extern void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}];", file=h_file)
++
+     # Print out an array that contains all of our gadgets, for lookup.
+-    print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="")
+-    print("{")
++    print(f"void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="", file=c_file)
++    print("{", file=c_file)
+ 
+     # D array
+     for a in TCG_REGISTER_NUMBERS:
+-        print("\t\t{", end="")
++        print("\t\t{", end="", file=c_file)
+ 
+         # I array
+         for b in immediate_range:
+-            print(f"gadget_{name}_arg{a}_arg{b}", end=", ")
++            print(f"gadget_{name}_arg{a}_arg{b}", end=", ", file=c_file)
+ 
+-        print("},")
+-    print("};")
++        print("},", file=c_file)
++    print("};", file=c_file)
+ 
+ 
+ 
+@@ -265,31 +365,14 @@ def with_d(name, *lines):
+ 
+ # Assembly code for saving our machine state before entering the C runtime.
+ C_CALL_PROLOGUE = [
+-    # Store our machine state.
+-    "str x25,      [sp, #-16]!",
+     "stp x14, x15, [sp, #-16]!",
+-    "stp x12, x13, [sp, #-16]!",
+-    "stp x10, x11, [sp, #-16]!",
+-    "stp x8,  x9,  [sp, #-16]!",
+-    "stp x6,  x7,  [sp, #-16]!",
+-    "stp x4,  x5,  [sp, #-16]!",
+-    "stp x2,  x3,  [sp, #-16]!",
+-    "stp x0,  x1,  [sp, #-16]!",
+     "stp x28, lr,  [sp, #-16]!",
+ ]
+ 
+ # Assembly code for restoring our machine state after leaving the C runtime.
+ C_CALL_EPILOGUE = [
+-    "ldp x28, lr, [sp], #16",
+-    "ldp x0,  x1, [sp], #16",
+-    "ldp x2,  x3, [sp], #16",
+-    "ldp x4,  x5, [sp], #16",
+-    "ldp x6,  x7, [sp], #16",
+-    "ldp x8,  x9, [sp], #16",
+-    "ldp x10, x11, [sp], #16",
+-    "ldp x12, x13, [sp], #16",
++    "ldp x28, lr,  [sp], #16",
+     "ldp x14, x15, [sp], #16",
+-    "ldr x25,      [sp], #16",
+ ]
+ 
+ 
+@@ -503,11 +586,73 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+             )
+ 
+ 
++
++def vector_dn(name, *lines):
++    """ Creates a set of gadgets for every size of a given vector op. Accepts 'S' as a size placeholder. """
++
++    def do_size_replacement(line, size):
++        line = line.replace(".S", f".{size}")
++        
++        # If this size requires a 32b register, replace Wd with Xd.
++        if size == "2d":
++            line = line.replace("Wn", "Xn")
++
++        return line
++
++
++    # Create a variant for each size, replacing any placeholders.
++    for size in VECTOR_SIZES:
++        sized_lines = (do_size_replacement(line, size) for line in lines)
++        with_dn(f"{name}_{size}", *sized_lines)
++
++
++def vector_dnm(name, *lines, scalar=None, omit_sizes=()):
++    """ Creates a set of gadgets for every size of a given vector op. Accepts 'S' as a size placeholder. """
++
++    def do_size_replacement(line, size):
++        return line.replace(".S", f".{size}")
++        
++    # Create a variant for each size, replacing any placeholders.
++    for size in VECTOR_SIZES:
++        if size in omit_sizes:
++            continue
++
++        sized_lines = (do_size_replacement(line, size) for line in lines)
++        with_dnm(f"{name}_{size}", *sized_lines)
++
++    if scalar:
++        if isinstance(scalar, str):
++            sized_lines = (scalar,)
++        with_dnm(f"{name}_scalar", *sized_lines)
++
++
++def vector_math_dnm(name, operation):
++    """ Generates a collection of gadgets for vector math instructions. """
++    vector_dnm(name, f"{operation} Vd.S, Vn.S, Vm.S", scalar=f"{operation} Dd, Dn, Dm")
++
++
++def vector_math_dnm_no64(name, operation):
++    """ Generates a collection of gadgets for vector math instructions. """
++    vector_dnm(name, f"{operation} Vd.S, Vn.S, Vm.S", omit_sizes=('2d',))
++
++
++def vector_logic_dn(name, operation):
++    """ Generates a pair of gadgets for vector bitwise logic instructions. """
++    with_dn(f"{name}_d", f"{operation} Vd.8b, Vn.8b")
++    with_dn(f"{name}_q", f"{operation} Vd.16b, Vn.16b")
++
++
++def vector_logic_dnm(name, operation):
++    """ Generates a pair of gadgets for vector bitwise logic instructions. """
++    with_dnm(f"{name}_d", f"{operation} Vd.8b, Vn.8b, Vm.8b")
++    with_dnm(f"{name}_q", f"{operation} Vd.16b, Vn.16b, Vm.16b")
++
++
+ #
+ # Gadget definitions.
+ #
+ 
+-print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n")
++START_COLLECTION("misc")
+ 
+ # Call a C language helper function by address.
+ simple("call",
+@@ -539,6 +684,7 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+     "ldr x28, [x28]"
+ )
+ 
++
+ # Exit from a translation buffer execution.
+ simple("exit_tb",
+ 
+@@ -550,9 +696,18 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+     "ret"
+ )
+ 
++# Memory barriers.
++simple("mb_all", "dmb ish")
++simple("mb_st",  "dmb ishst")
++simple("mb_ld",  "dmb ishld")
++
++
++
+ 
+ for condition in ARCH_CONDITION_CODES:
+ 
++    START_COLLECTION("setcond")
++
+     # Performs a comparison between two operands.
+     with_dnm(f"setcond_i32_{condition}",
+         "subs Wd, Wn, Wm",
+@@ -573,23 +728,20 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+     # branch is funneled throught the same address.
+     #
+ 
++    START_COLLECTION("brcond")
++
+     # Branches iff a given comparison is true.
+     with_dnm(f'brcond_i32_{condition}',
+ 
+         # Grab our immediate argument.
+         "ldr x27, [x28], #8",
+ 
+-        # Perform our comparison and conditional branch.
+-        "subs Wzr, Wn, Wm",
+-        f"b{condition} 1f",
+-
+-        "0:", # not taken
+-           # Perform our end-of-instruction epilogue.
+-            *EPILOGUE,
++        # Perform our comparison...
++        "subs wzr, Wn, Wm",
+ 
+-        "1:" # taken
+-            # Update our bytecode pointer to take the label.
+-            "mov x28, x27"
++        # ... and our conditional branch, which selectively sets w28 (our "gadget pointer")
++        # to the new location, if required.
++        f"csel x28, x27, x28, {condition}"
+     )
+ 
+     # Branches iff a given comparison is true.
+@@ -599,19 +751,17 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+         "ldr x27, [x28], #8",
+ 
+         # Perform our comparison and conditional branch.
+-        "subs Xzr, Xn, Xm",
+-        f"b{condition} 1f",
++        "subs xzr, Xn, Xm",
+ 
+-        "0:", # not taken
+-            # Perform our end-of-instruction epilogue.
+-            *EPILOGUE,
+-
+-        "1:" # taken
+-            # Update our bytecode pointer to take the label.
+-            "mov x28, x27"
++        # ... and our conditional branch, which selectively sets w28 (our "gadget pointer")
++        # to the new location, if required.
++        f"csel x28, x27, x28, {condition}"
+     )
+ 
+ 
++START_COLLECTION("mov")
++
++
+ # MOV variants.
+ with_dn("mov_i32",     "mov Wd, Wn")
+ with_dn("mov_i64",     "mov Xd, Xn")
+@@ -623,17 +773,24 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+ with_d_immediate("movi_imm_i32", "mov Wd, #Ii", immediate_range=range(64))
+ with_d_immediate("movi_imm_i64", "mov Xd, #Ii", immediate_range=range(64))
+ 
++START_COLLECTION("load_unsigned")
++
+ # LOAD variants.
+ # TODO: should the signed variants have X variants for _i64?
+ ldst_dn("ld8u",      "ldrb  Wd, [Xn, x27]")
++ldst_dn("ld16u",     "ldrh  Wd, [Xn, x27]")
++ldst_dn("ld32u",     "ldr   Wd, [Xn, x27]")
++ldst_dn("ld_i64",    "ldr   Xd, [Xn, x27]")
++
++START_COLLECTION("load_signed")
++
+ ldst_dn("ld8s_i32",  "ldrsb Wd, [Xn, x27]")
+ ldst_dn("ld8s_i64",  "ldrsb Xd, [Xn, x27]")
+-ldst_dn("ld16u",     "ldrh  Wd, [Xn, x27]")
+ ldst_dn("ld16s_i32", "ldrsh Wd, [Xn, x27]")
+ ldst_dn("ld16s_i64", "ldrsh Xd, [Xn, x27]")
+-ldst_dn("ld32u",     "ldr   Wd, [Xn, x27]")
+ ldst_dn("ld32s_i64", "ldrsw Xd, [Xn, x27]")
+-ldst_dn("ld_i64",    "ldr   Xd, [Xn, x27]")
++
++START_COLLECTION("store")
+ 
+ # STORE variants.
+ ldst_dn("st8",         "strb  Wd, [Xn, x27]")
+@@ -644,6 +801,8 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+ # QEMU LD/ST are handled in our C runtime rather than with simple gadgets,
+ # as they're nontrivial.
+ 
++START_COLLECTION("arithmetic")
++
+ # Trivial arithmetic.
+ math_dnm("add" , "add" )
+ math_dnm("sub" , "sub" )
+@@ -657,6 +816,8 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+ with_dnm("remu_i32", "udiv w27, Wn, Wm", "msub Wd, w27, Wm, Wn")
+ with_dnm("remu_i64", "udiv x27, Xn, Xm", "msub Xd, x27, Xm, Xn")
+ 
++START_COLLECTION("logical")
++
+ # Trivial logical.
+ math_dn( "not",  "mvn")
+ math_dn( "neg",  "neg")
+@@ -669,71 +830,155 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+ math_dnm("shl",  "lsl")
+ math_dnm("shr",  "lsr")
+ math_dnm("sar",  "asr")
++math_dnm("rotr", "ror")
+ 
+ # AArch64 lacks a Rotate Left; so we instead rotate right by a negative.
+-# TODO: validate this?
+-#math_dnm("rotr", "ror")
+-#with_dnm("rotl_i32", "neg w27, Wm", "ror Wd, Wn, w27")
+-#with_dnm("rotl_i64", "neg x27, Xm", "ror Xd, Xn, x27")
++with_dnm("rotl_i32", "neg w27, Wm", "ror Wd, Wn, w27")
++with_dnm("rotl_i64", "neg w27, Wm", "ror Xd, Xn, x27")
++
++# We'll synthesize several instructions that don't exist; since it's still faster
++# to run these as gadgets.
++with_dnm("nand_i32", "and Wd, Wn, Wm", "mvn Wd, Wd")
++with_dnm("nand_i64", "and Xd, Xn, Xm", "mvn Xd, Xd")
++with_dnm("nor_i32",  "orr Wd, Wn, Wm", "mvn Wd, Wd")
++with_dnm("nor_i64",  "orr Xd, Xn, Xm", "mvn Xd, Xd")
++
++START_COLLECTION("bitwise")
++
++# Count leading zeroes, with a twist: QEMU requires us to provide
++# a default value for when the argument is 0.
++with_dnm("clz_i32",
++
++    # Perform the core CLZ into w26.
++    "clz w26, Wn",
++
++    # Check Wn to see if it was zero
++    "tst Wn, Wn",
++
++    # If it was zero, accept the argument provided in Wm.
++    # Otherwise, accept our result from w26.
++    "csel Wd, Wm, w26, eq"
++)
++with_dnm("clz_i64",
++
++    # Perform the core CLZ into w26.
++    "clz x26, Xn",
++
++    # Check Wn to see if it was zero
++    "tst Xn, Xn",
++
++    # If it was zero, accept the argument provided in Wm.
++    # Otherwise, accept our result from w26.
++    "csel Xd, Xm, x26, eq"
++)
++
++
++# Count trailing zeroes, with a twist: QEMU requires us to provide
++# a default value for when the argument is 0.
++with_dnm("ctz_i32",
++    # Reverse our bits before performing our actual clz.
++    "rbit w26, Wn",
++    "clz w26, w26",
++
++    # Check Wn to see if it was zero
++    "tst Wn, Wn",
++
++    # If it was zero, accept the argument provided in Wm.
++    # Otherwise, accept our result from w26.
++    "csel Wd, Wm, w26, eq"
++)
++with_dnm("ctz_i64",
++
++    # Perform the core CLZ into w26.
++    "rbit x26, Xn",
++    "clz x26, x26",
++
++    # Check Wn to see if it was zero
++    "tst Xn, Xn",
++
++    # If it was zero, accept the argument provided in Wm.
++    # Otherwise, accept our result from w26.
++    "csel Xd, Xm, x26, eq"
++)
++
++
++START_COLLECTION("extension")
+ 
+ # Numeric extension.
+-math_dn("ext8s",      "sxtb")
++math_dn("ext8s",      "sxtb", source_is_wn=True)
+ with_dn("ext8u",      "and Xd, Xn, #0xff")
+-math_dn("ext16s",     "sxth")
++math_dn("ext16s",     "sxth", source_is_wn=True)
+ with_dn("ext16u",     "and Wd, Wn, #0xffff")
+ with_dn("ext32s_i64", "sxtw Xd, Wn")
+-with_dn("ext32u_i64", "and Xd, Xn, #0xffffffff")
++with_dn("ext32u_i64", "mov Wd, Wn")
++
++# Numeric extraction.
++with_dn("extrl",      "mov Wd, Wn")
++with_dn("extrh",      "lsr Xd, Xn, #32")
++
++START_COLLECTION("byteswap")
+ 
+ # Byte swapping.
+ with_dn("bswap16",    "rev w27, Wn", "lsr Wd, w27, #16")
+ with_dn("bswap32",    "rev Wd, Wn")
+ with_dn("bswap64",    "rev Xd, Xn")
+ 
+-# Memory barriers.
+-simple("mb_all", "dmb ish")
+-simple("mb_st",  "dmb ishst")
+-simple("mb_ld",  "dmb ishld")
+ 
+ # Handlers for QEMU_LD, which handles guest <- host loads.
+ for subtype in ('aligned', 'unaligned', 'slowpath'):
+     is_aligned  = (subtype == 'aligned')
+     is_slowpath = (subtype == 'slowpath')
+ 
++    START_COLLECTION(f"qemu_ld_{subtype}_unsigned_le")
++
+     ld_thunk(f"qemu_ld_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
+         fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
+         force_slowpath=is_slowpath,
+     )
+-    ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu_signed",
+-        fastpath_32b=["ldrsb Wd, [Xn, x27]"], fastpath_64b=["ldrsb Xd, [Xn, x27]"],
+-        force_slowpath=is_slowpath,
+-    )
+     ld_thunk(f"qemu_ld_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu",
+         fastpath_32b=["ldrh Wd, [Xn, x27]"], fastpath_64b=["ldrh Wd, [Xn, x27]"],
+         force_slowpath=is_slowpath,
+     )
+-    ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu_signed",
+-        fastpath_32b=["ldrsh Wd, [Xn, x27]"], fastpath_64b=["ldrsh Xd, [Xn, x27]"],
+-        force_slowpath=is_slowpath,
+-    )
+     ld_thunk(f"qemu_ld_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu",
+         fastpath_32b=["ldr Wd, [Xn, x27]"], fastpath_64b=["ldr Wd, [Xn, x27]"],
+         force_slowpath=is_slowpath,
+     )
++    ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
++        fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
++        force_slowpath=is_slowpath,
++    )
++
++    START_COLLECTION(f"qemu_ld_{subtype}_signed_le")
++
++    ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu_signed",
++        fastpath_32b=["ldrsb Wd, [Xn, x27]"], fastpath_64b=["ldrsb Xd, [Xn, x27]"],
++        force_slowpath=is_slowpath,
++    )
++    ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu_signed",
++        fastpath_32b=["ldrsh Wd, [Xn, x27]"], fastpath_64b=["ldrsh Xd, [Xn, x27]"],
++        force_slowpath=is_slowpath,
++    )
+     ld_thunk(f"qemu_ld_lesl_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu_signed",
+         fastpath_32b=["ldrsw Xd, [Xn, x27]"], fastpath_64b=["ldrsw Xd, [Xn, x27]"],
+         force_slowpath=is_slowpath,
+     )
+-    ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
++
++    # Special variant for the most common modes, as a speedup optimization.
++    ld_thunk(f"qemu_ld_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
++        fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
++        force_slowpath=is_slowpath, immediate=0x02
++    )
++    ld_thunk(f"qemu_ld_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
+         fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
+-        force_slowpath=is_slowpath,
++        force_slowpath=is_slowpath, immediate=0x32
+     )
+-
+-    # Special variant for the most common mode, as a speedup optimization.
+     ld_thunk(f"qemu_ld_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
+         fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
+         force_slowpath=is_slowpath, immediate=0x3a
+     )
+ 
++    START_COLLECTION(f"qemu_ld_{subtype}_be")
++
+     # For now, leave the rare/big-endian stuff slow-path only.
+     ld_thunk(f"qemu_ld_beuw_{subtype}", None, None, "helper_be_lduw_mmu",         
+             is_aligned=is_aligned, force_slowpath=is_slowpath)
+@@ -747,11 +992,15 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+             is_aligned=is_aligned, force_slowpath=is_slowpath)
+ 
+ 
++
++
+ # Handlers for QEMU_ST, which handles guest -> host stores.
+ for subtype in ('aligned', 'unaligned', 'slowpath'):
+     is_aligned  = (subtype == 'aligned')
+     is_slowpath = (subtype == 'slowpath')
+ 
++    START_COLLECTION(f"qemu_st_{subtype}_le")
++
+     st_thunk(f"qemu_st_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
+         fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
+         force_slowpath=is_slowpath,
+@@ -770,11 +1019,21 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+     )
+     
+     # Special optimization for the most common modes.
++    st_thunk(f"qemu_st_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
++        fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
++        force_slowpath=is_slowpath, immediate=0x02
++    )
++    st_thunk(f"qemu_st_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
++        fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
++        force_slowpath=is_slowpath, immediate=0x32
++    )
+     st_thunk(f"qemu_st_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
+         fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
+         force_slowpath=is_slowpath, immediate=0x3a
+     )
+ 
++    START_COLLECTION(f"qemu_st_{subtype}_be")
++
+     # For now, leave the rare/big-endian stuff slow-path only.
+     st_thunk(f"qemu_st_beuw_{subtype}", None, None, "helper_be_stw_mmu",  
+             is_aligned=is_aligned, force_slowpath=is_slowpath)
+@@ -784,5 +1043,121 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
+             is_aligned=is_aligned, force_slowpath=is_slowpath)
+ 
+ 
++#
++# SIMD/Vector ops
++#
++
++# SIMD MOVI instructions.
++START_COLLECTION(f"simd_base")
++
++# Unoptimized/unoptimizable load of a vector64; grabbing an immediate.
++with_d("ldi_d", "ldr Dd, [x28], #8")
++with_d("ldi_q", "ldr Qd, [x28], #16")
++
++# General purpose reg -> vec rec loads
++vector_dn("dup", "dup Vd.S, Wn")
++
++# move vector -> GP reg
++with_dn("umov_s0", "umov Wd, Vn.s[0]")
++with_dn("umov_d0", "umov Xd, Vn.d[0]")
++
++# mov GP reg -> vector
++with_dn("ins_s0", "ins Vd.s[0], Wn")
++with_dn("ins_d0", "ins Vd.d[0], Xn")
++
++
++# Memory -> vec reg loads.
++# The offset of the load is stored in a 64b immediate.
++
++# Duplicating load.
++# TODO: possibly squish the add into the ld1r, if that's valid?
++vector_dn("dupm", "ldr x27, [x28], #8", "add x27, x27, Xn", "ld1r {Vd.S}, [x27]")
++
++# Direct loads.
++with_dn("ldr_d",  "ldr x27, [x28], #8", "ldr Dd, [Xn, x27]")
++with_dn("ldr_q",  "ldr x27, [x28], #8", "ldr Qd, [Xn, x27]")
++
++# vec -> reg stores.
++# The offset of the stores is stored in a 64b immediate.
++with_dn("str_d",  "ldr x27, [x28], #8", "str Dd, [Xn, x27]")
++with_dn("str_q",  "ldr x27, [x28], #8", "str Qd, [Xn, x27]")
++
++
++START_COLLECTION(f"simd_arithmetic")
++
++vector_math_dnm("add",   "add")
++vector_math_dnm("usadd", "uqadd")
++vector_math_dnm("ssadd", "sqadd")
++vector_math_dnm("sub",   "sub")
++vector_math_dnm("ussub", "uqsub")
++vector_math_dnm("sssub", "sqsub")
++vector_math_dnm_no64("mul",  "mul")
++vector_math_dnm_no64("smax", "smax")
++vector_math_dnm_no64("smin", "smin")
++vector_math_dnm_no64("umax", "umax")
++vector_math_dnm_no64("umin", "umin")
++
++START_COLLECTION(f"simd_logical")
++
++vector_logic_dnm("and",  "and")
++vector_logic_dnm("andc", "bic")
++vector_logic_dnm("or",   "orr")
++vector_logic_dnm("orc",  "orn")
++vector_logic_dnm("xor",  "eor")
++vector_logic_dn( "not",  "not")
++vector_dn("neg", "neg Vd.S, Vn.S")
++vector_dn("abs", "abs Vd.S, Vn.S")
++vector_logic_dnm( "bit",  "bit")
++vector_logic_dnm( "bif",  "bif")
++vector_logic_dnm( "bsl",  "bsl")
++
++vector_math_dnm("shlv", "ushl")
++vector_math_dnm("sshl", "sshl")
++
++vector_dnm("cmeq", "cmeq Vd.S, Vn.S, Vm.S", scalar="cmeq Dd, Dn, Dm")
++vector_dnm("cmgt", "cmgt Vd.S, Vn.S, Vm.S", scalar="cmgt Dd, Dn, Dm")
++vector_dnm("cmge", "cmge Vd.S, Vn.S, Vm.S", scalar="cmge Dd, Dn, Dm")
++vector_dnm("cmhi", "cmhi Vd.S, Vn.S, Vm.S", scalar="cmhi Dd, Dn, Dm")
++vector_dnm("cmhs", "cmhs Vd.S, Vn.S, Vm.S", scalar="cmhs Dd, Dn, Dm")
++
++START_COLLECTION(f"simd_immediate")
++
++# Simple imm8 movs...
++with_d_immediate("movi_cmode_e_op0_q0",  "movi Vd.8b, #Ii",          immediate_range=range(256))
++with_d_immediate("movi_cmode_e_op0_q1",  "movi Vd.16b, #Ii",         immediate_range=range(256))
++
++# ... all 00/FF movs...
++with_d_immediate("movi_cmode_e_op1_q0",  "movi Dd, #Si",             immediate_range=range(256))
++with_d_immediate("movi_cmode_e_op1_q1",  "movi Vd.2d, #Si",          immediate_range=range(256))
++
++# Halfword MOVs.
++with_d_immediate("movi_cmode_8_op0_q0",  "movi Vd.4h, #Ii",         immediate_range=range(256))
++with_d_immediate("movi_cmode_8_op0_q1",  "movi Vd.8h, #Ii",         immediate_range=range(256))
++with_d_immediate("mvni_cmode_8_op0_q0",  "mvni Vd.4h, #Ii",         immediate_range=range(256))
++with_d_immediate("mvni_cmode_8_op0_q1",  "mvni Vd.8h, #Ii",         immediate_range=range(256))
++with_d_immediate("movi_cmode_a_op0_q0",  "movi Vd.4h, #Ii, lsl #8", immediate_range=range(256))
++with_d_immediate("movi_cmode_a_op0_q1",  "movi Vd.8h, #Ii, lsl #8", immediate_range=range(256))
++with_d_immediate("mvni_cmode_a_op0_q0",  "mvni Vd.4h, #Ii, lsl #8", immediate_range=range(256))
++with_d_immediate("mvni_cmode_a_op0_q1",  "mvni Vd.8h, #Ii, lsl #8", immediate_range=range(256))
++
++# Halfword ORIs, for building complex MOVs.
++with_d_immediate("orr_cmode_a_op0_q0",   "orr Vd.4h, #Ii, lsl #8",  immediate_range=range(256))
++with_d_immediate("orr_cmode_a_op0_q1",   "orr Vd.8h, #Ii, lsl #8",  immediate_range=range(256))
++
++
++# Print a list of output files generated.
++output_c_filenames = (f"'tcti_{name}_gadgets.c'" for name in output_files.keys())
++output_h_filenames = (f"'tcti_{name}_gadgets.h'" for name in output_files.keys())
++
++print("Sources generated:",    file=sys.stderr)
++print(f"gadgets = [",          file=sys.stderr)
++print("      tcti_gadgets.h,", file=sys.stderr)
++
++for name in output_files.keys():
++    print(f"      'tcti_{name}_gadgets.c',", file=sys.stderr)
++    print(f"      'tcti_{name}_gadgets.h',", file=sys.stderr)
++
++print(f"]", file=sys.stderr)
++
+ # Statistics.
+-sys.stderr.write(f"\nGenerated {gadgets} gadgets with {instructions} instructions ({instructions * 4} B).\n\n")
++sys.stderr.write(f"\nGenerated {gadgets} gadgets with {instructions} instructions (~{(instructions * 4) // 1024 // 1024} MiB).\n\n")
+diff --git a/util/osdep.c b/util/osdep.c
+index 81c46df6f5..8df113c2df 100644
+--- a/util/osdep.c
++++ b/util/osdep.c
+@@ -114,6 +114,12 @@ int qemu_mprotect_none(void *addr, size_t size)
+ #ifdef _WIN32
+     return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
+ #else
++# if defined(__APPLE__) && defined(__arm64__)
++    if (__builtin_available(macOS 11.2, *)) {
++        /* mprotect() in macOS 11.2 can't switch RWX to NONE */
++        return 0;
++    }
++# endif
+     return qemu_mprotect__osdep(addr, size, PROT_NONE);
+ #endif
+ }
+From patchwork Fri Dec 23 08:50:46 2022
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
+X-Patchwork-Id: 13080757
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
+	aws-us-west-2-korg-lkml-1.web.codeaurora.org
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
+	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+	(No client certificate requested)
+	by smtp.lore.kernel.org (Postfix) with ESMTPS id 07CC3C4167B
+	for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
+Received: from localhost ([::1] helo=lists1p.gnu.org)
+	by lists.gnu.org with esmtp (Exim 4.90_1)
+	(envelope-from <qemu-devel-bounces@nongnu.org>)
+	id 1p8dlq-0007Qq-3N; Fri, 23 Dec 2022 03:51:15 -0500
+Received: from eggs.gnu.org ([2001:470:142:3::10])
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
+ id 1p8dlb-0007K8-Ti; Fri, 23 Dec 2022 03:51:00 -0500
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
+ (envelope-from <agraf@csgraf.de>)
+ id 1p8dlZ-00046m-Nu; Fri, 23 Dec 2022 03:50:59 -0500
+Received: from localhost.localdomain
+ (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
+ by csgraf.de (Postfix) with ESMTPSA id 0231260804D4;
+ Fri, 23 Dec 2022 09:50:48 +0100 (CET)
+From: Alexander Graf <agraf@csgraf.de>
+To: qemu-devel@nongnu.org
+Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
+ Yanan Wang <wangyanan55@huawei.com>,
+ =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
+ Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
+ Eduardo Habkost <eduardo@habkost.net>,
+ Shashi Mallela <shashi.mallela@linaro.org>,
+ Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
+Subject: [PATCH 1/2] hw/intc/arm_gicv3: Make ITT entry size configurable
+Date: Fri, 23 Dec 2022 09:50:46 +0100
+Message-Id: <20221223085047.94832-2-agraf@csgraf.de>
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
+In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
+References: <20221223085047.94832-1-agraf@csgraf.de>
+MIME-Version: 1.0
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
+ helo=zulu616.server4you.de
+X-Spam_score_int: -18
+X-Spam_score: -1.9
+X-Spam_bar: -
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
+X-Spam_action: no action
+X-BeenThere: qemu-devel@nongnu.org
+X-Mailman-Version: 2.1.29
+Precedence: list
+List-Id: <qemu-devel.nongnu.org>
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
+List-Post: <mailto:qemu-devel@nongnu.org>
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+
+An ITT entry is opaque to the OS. The only thing it does get told by HW is
+its size. In theory, that size can be any byte aligned number, in practice
+HW will always use power of 2s to simplify offset calculation. We currently
+expose the size as 12, which is not a power of 2.
+
+To prepare for a future where we expose power of 2 sized entry sizes, let's
+make the size itself configurable. We only need to watch out that we don't
+have an entry be smaller than the fields we want to access inside. Bigger
+is always fine.
+
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
+---
+ hw/intc/arm_gicv3_its.c                | 14 +++++++++++---
+ hw/intc/gicv3_internal.h               |  2 +-
+ include/hw/intc/arm_gicv3_its_common.h |  1 +
+ 3 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
+index 57c79da5c5..e7cabeb46c 100644
+--- a/hw/intc/arm_gicv3_its.c
++++ b/hw/intc/arm_gicv3_its.c
+@@ -215,7 +215,7 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte,
+ {
+     AddressSpace *as = &s->gicv3->dma_as;
+     MemTxResult res = MEMTX_OK;
+-    hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
++    hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
+     uint64_t itel = 0;
+     uint32_t iteh = 0;
+ 
+@@ -253,7 +253,7 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid,
+     MemTxResult res = MEMTX_OK;
+     uint64_t itel;
+     uint32_t iteh;
+-    hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
++    hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
+ 
+     itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res);
+     if (res != MEMTX_OK) {
+@@ -1934,6 +1934,12 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
+         }
+     }
+ 
++    if (s->itt_entry_size < MIN_ITS_ITT_ENTRY_SIZE) {
++        error_setg(errp, "ITT entry size must be at least %d",
++                   MIN_ITS_ITT_ENTRY_SIZE);
++        return;
++    }
++
+     gicv3_add_its(s->gicv3, dev);
+ 
+     gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
+@@ -1941,7 +1947,7 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
+     /* set the ITS default features supported */
+     s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL, 1);
+     s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE,
+-                          ITS_ITT_ENTRY_SIZE - 1);
++                          s->itt_entry_size - 1);
+     s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS);
+     s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS);
+     s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1);
+@@ -2008,6 +2014,8 @@ static void gicv3_its_post_load(GICv3ITSState *s)
+ static Property gicv3_its_props[] = {
+     DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
+                      GICv3State *),
++    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
++                      MIN_ITS_ITT_ENTRY_SIZE),
+     DEFINE_PROP_END_OF_LIST(),
+ };
+ 
+diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
+index 29d5cdc1b6..2aca1ba095 100644
+--- a/hw/intc/gicv3_internal.h
++++ b/hw/intc/gicv3_internal.h
+@@ -450,7 +450,7 @@ FIELD(VINVALL_1, VPEID, 32, 16)
+  * the value of that field in memory cannot be relied upon -- older
+  * versions of QEMU did not correctly write to that memory.)
+  */
+-#define ITS_ITT_ENTRY_SIZE            0xC
++#define MIN_ITS_ITT_ENTRY_SIZE            0xC
+ 
+ FIELD(ITE_L, VALID, 0, 1)
+ FIELD(ITE_L, INTTYPE, 1, 1)
+diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
+index a11a0f6654..e730a5482c 100644
+--- a/include/hw/intc/arm_gicv3_its_common.h
++++ b/include/hw/intc/arm_gicv3_its_common.h
+@@ -66,6 +66,7 @@ struct GICv3ITSState {
+     int dev_fd; /* kvm device fd if backed by kvm vgic support */
+     uint64_t gits_translater_gpa;
+     bool translater_gpa_known;
++    uint8_t itt_entry_size;
+ 
+     /* Registers */
+     uint32_t ctlr;
+
+From patchwork Fri Dec 23 08:50:47 2022
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
+X-Patchwork-Id: 13080758
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
+	aws-us-west-2-korg-lkml-1.web.codeaurora.org
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
+	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+	(No client certificate requested)
+	by smtp.lore.kernel.org (Postfix) with ESMTPS id 07C93C4332F
+	for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
+Received: from localhost ([::1] helo=lists1p.gnu.org)
+	by lists.gnu.org with esmtp (Exim 4.90_1)
+	(envelope-from <qemu-devel-bounces@nongnu.org>)
+	id 1p8dlw-0007Sh-C5; Fri, 23 Dec 2022 03:51:20 -0500
+Received: from eggs.gnu.org ([2001:470:142:3::10])
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
+ id 1p8dle-0007LZ-9W; Fri, 23 Dec 2022 03:51:02 -0500
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
+ (envelope-from <agraf@csgraf.de>)
+ id 1p8dlc-00046r-NI; Fri, 23 Dec 2022 03:51:02 -0500
+Received: from localhost.localdomain
+ (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
+ by csgraf.de (Postfix) with ESMTPSA id 747226080975;
+ Fri, 23 Dec 2022 09:50:49 +0100 (CET)
+From: Alexander Graf <agraf@csgraf.de>
+To: qemu-devel@nongnu.org
+Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
+ Yanan Wang <wangyanan55@huawei.com>,
+ =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
+ Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
+ Eduardo Habkost <eduardo@habkost.net>,
+ Shashi Mallela <shashi.mallela@linaro.org>,
+ Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
+Subject: [PATCH 2/2] hw/intc/arm_gicv3: Bump ITT entry size to 16
+Date: Fri, 23 Dec 2022 09:50:47 +0100
+Message-Id: <20221223085047.94832-3-agraf@csgraf.de>
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
+In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
+References: <20221223085047.94832-1-agraf@csgraf.de>
+MIME-Version: 1.0
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
+ helo=zulu616.server4you.de
+X-Spam_score_int: -18
+X-Spam_score: -1.9
+X-Spam_bar: -
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
+X-Spam_action: no action
+X-BeenThere: qemu-devel@nongnu.org
+X-Mailman-Version: 2.1.29
+Precedence: list
+List-Id: <qemu-devel.nongnu.org>
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
+List-Post: <mailto:qemu-devel@nongnu.org>
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+
+Some Operating Systems (like Windows) can only deal with ITT entry sizes
+that are a power of 2. While the spec allows arbitrarily sized ITT entry
+sizes, in practice all hardware will use power of 2 because that
+simplifies offset calculation and ensures that a power of 2 sized region
+can hold a set of entries without gap at the end.
+
+So let's just bump the entry size to 16. That gives us enough space for
+the 12 bytes of data that we want to have in each ITT entry and makes
+QEMU look a bit more like real hardware.
+
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
+---
+ hw/core/machine.c       | 4 +++-
+ hw/intc/arm_gicv3_its.c | 3 +--
+ 2 files changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/hw/core/machine.c b/hw/core/machine.c
+index 8d34caa31d..c81b3810c2 100644
+--- a/hw/core/machine.c
++++ b/hw/core/machine.c
+@@ -42,6 +42,7 @@
+ 
+ GlobalProperty hw_compat_7_1[] = {
+     { "virtio-device", "queue_reset", "false" },
++    { "arm-gicv3-its", "itt-entry-size", "12" },
+ };
+ const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1);
+ 
+diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
+index e7cabeb46c..6754523321 100644
+--- a/hw/intc/arm_gicv3_its.c
++++ b/hw/intc/arm_gicv3_its.c
+@@ -2014,8 +2014,7 @@ static void gicv3_its_post_load(GICv3ITSState *s)
+ static Property gicv3_its_props[] = {
+     DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
+                      GICv3State *),
+-    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
+-                      MIN_ITS_ITT_ENTRY_SIZE),
++    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size, 16),
+     DEFINE_PROP_END_OF_LIST(),
+ };
+ 
+From patchwork Mon Dec 19 22:08:08 2022
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
+X-Patchwork-Id: 13077199
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
+	aws-us-west-2-korg-lkml-1.web.codeaurora.org
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
+	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
+	(No client certificate requested)
+	by smtp.lore.kernel.org (Postfix) with ESMTPS id A8832C4332F
+	for <qemu-devel@archiver.kernel.org>; Mon, 19 Dec 2022 22:09:07 +0000 (UTC)
+Received: from localhost ([::1] helo=lists1p.gnu.org)
+	by lists.gnu.org with esmtp (Exim 4.90_1)
+	(envelope-from <qemu-devel-bounces@nongnu.org>)
+	id 1p7OJ4-0007i9-Cx; Mon, 19 Dec 2022 17:08:22 -0500
+Received: from eggs.gnu.org ([2001:470:142:3::10])
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
+ id 1p7OIz-0007hY-J0; Mon, 19 Dec 2022 17:08:20 -0500
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
+ (envelope-from <agraf@csgraf.de>)
+ id 1p7OIv-0003o3-7R; Mon, 19 Dec 2022 17:08:15 -0500
+Received: from localhost.localdomain
+ (dynamic-077-002-090-134.77.2.pool.telefonica.de [77.2.90.134])
+ by csgraf.de (Postfix) with ESMTPSA id D1CBF60806FC;
+ Mon, 19 Dec 2022 23:08:09 +0100 (CET)
+From: Alexander Graf <agraf@csgraf.de>
+To: qemu-devel@nongnu.org
+Cc: Peter Maydell <peter.maydell@linaro.org>,
+	qemu-arm@nongnu.org
+Subject: [PATCH] hvf: arm: Add support for GICv3
+Date: Mon, 19 Dec 2022 23:08:08 +0100
+Message-Id: <20221219220808.26392-1-agraf@csgraf.de>
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
+MIME-Version: 1.0
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
+ helo=zulu616.server4you.de
+X-Spam_score_int: -18
+X-Spam_score: -1.9
+X-Spam_bar: -
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
+X-Spam_action: no action
+X-BeenThere: qemu-devel@nongnu.org
+X-Mailman-Version: 2.1.29
+Precedence: list
+List-Id: <qemu-devel.nongnu.org>
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
+List-Post: <mailto:qemu-devel@nongnu.org>
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
+
+We currently only support GICv2 emulation. To also support GICv3, we will
+need to pass a few system registers into their respective handler functions.
+
+This patch adds support for HVF to call into the TCG callbacks for GICv3
+system register handlers. This is safe because the GICv3 TCG code is generic
+as long as we limit ourselves to EL0 and EL1 - which are the only modes
+supported by HVF.
+
+To make sure nobody trips over that, we also annotate callbacks that don't
+work in HVF mode, such as EL state change hooks.
+
+With GICv3 support in place, we can run with more than 8 vCPUs.
+
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
+---
+ hw/intc/arm_gicv3_cpuif.c   |   8 +-
+ target/arm/hvf/hvf.c        | 151 ++++++++++++++++++++++++++++++++++++
+ target/arm/hvf/trace-events |   2 +
+ 3 files changed, 160 insertions(+), 1 deletion(-)
+
+diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
+index b17b29288c..b4e387268c 100644
+--- a/hw/intc/arm_gicv3_cpuif.c
++++ b/hw/intc/arm_gicv3_cpuif.c
+@@ -21,6 +21,7 @@
+ #include "hw/irq.h"
+ #include "cpu.h"
+ #include "target/arm/cpregs.h"
++#include "sysemu/tcg.h"
+ 
+ /*
+  * Special case return value from hppvi_index(); must be larger than
+@@ -2810,6 +2811,8 @@ void gicv3_init_cpuif(GICv3State *s)
+          * which case we'd get the wrong value.
+          * So instead we define the regs with no ri->opaque info, and
+          * get back to the GICv3CPUState from the CPUARMState.
++         *
++         * These CP regs callbacks can be called from either TCG or HVF code.
+          */
+         define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+ 
+@@ -2905,6 +2908,9 @@ void gicv3_init_cpuif(GICv3State *s)
+                 define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo);
+             }
+         }
+-        arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs);
++        if (tcg_enabled()) {
++            /* We can only trap EL changes with TCG for now */
++            arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs);
++        }
+     }
+ }
+diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
+index 060aa0ccf4..8ea4be5f30 100644
+--- a/target/arm/hvf/hvf.c
++++ b/target/arm/hvf/hvf.c
+@@ -80,6 +80,33 @@
+ #define SYSREG_PMCCNTR_EL0    SYSREG(3, 3, 9, 13, 0)
+ #define SYSREG_PMCCFILTR_EL0  SYSREG(3, 3, 14, 15, 7)
+ 
++#define SYSREG_ICC_AP0R0_EL1     SYSREG(3, 0, 12, 8, 4)
++#define SYSREG_ICC_AP0R1_EL1     SYSREG(3, 0, 12, 8, 5)
++#define SYSREG_ICC_AP0R2_EL1     SYSREG(3, 0, 12, 8, 6)
++#define SYSREG_ICC_AP0R3_EL1     SYSREG(3, 0, 12, 8, 7)
++#define SYSREG_ICC_AP1R0_EL1     SYSREG(3, 0, 12, 9, 0)
++#define SYSREG_ICC_AP1R1_EL1     SYSREG(3, 0, 12, 9, 1)
++#define SYSREG_ICC_AP1R2_EL1     SYSREG(3, 0, 12, 9, 2)
++#define SYSREG_ICC_AP1R3_EL1     SYSREG(3, 0, 12, 9, 3)
++#define SYSREG_ICC_ASGI1R_EL1    SYSREG(3, 0, 12, 11, 6)
++#define SYSREG_ICC_BPR0_EL1      SYSREG(3, 0, 12, 8, 3)
++#define SYSREG_ICC_BPR1_EL1      SYSREG(3, 0, 12, 12, 3)
++#define SYSREG_ICC_CTLR_EL1      SYSREG(3, 0, 12, 12, 4)
++#define SYSREG_ICC_DIR_EL1       SYSREG(3, 0, 12, 11, 1)
++#define SYSREG_ICC_EOIR0_EL1     SYSREG(3, 0, 12, 8, 1)
++#define SYSREG_ICC_EOIR1_EL1     SYSREG(3, 0, 12, 12, 1)
++#define SYSREG_ICC_HPPIR0_EL1    SYSREG(3, 0, 12, 8, 2)
++#define SYSREG_ICC_HPPIR1_EL1    SYSREG(3, 0, 12, 12, 2)
++#define SYSREG_ICC_IAR0_EL1      SYSREG(3, 0, 12, 8, 0)
++#define SYSREG_ICC_IAR1_EL1      SYSREG(3, 0, 12, 12, 0)
++#define SYSREG_ICC_IGRPEN0_EL1   SYSREG(3, 0, 12, 12, 6)
++#define SYSREG_ICC_IGRPEN1_EL1   SYSREG(3, 0, 12, 12, 7)
++#define SYSREG_ICC_PMR_EL1       SYSREG(3, 0, 4, 6, 0)
++#define SYSREG_ICC_RPR_EL1       SYSREG(3, 0, 12, 11, 3)
++#define SYSREG_ICC_SGI0R_EL1     SYSREG(3, 0, 12, 11, 7)
++#define SYSREG_ICC_SGI1R_EL1     SYSREG(3, 0, 12, 11, 5)
++#define SYSREG_ICC_SRE_EL1       SYSREG(3, 0, 12, 12, 5)
++
+ #define WFX_IS_WFE (1 << 0)
+ 
+ #define TMR_CTL_ENABLE  (1 << 0)
+@@ -788,6 +815,43 @@ static bool is_id_sysreg(uint32_t reg)
+            SYSREG_CRM(reg) < 8;
+ }
+ 
++static uint32_t hvf_reg2cp_reg(uint32_t reg)
++{
++    return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
++                              (reg >> 10) & 0xf,
++                              (reg >> 1) & 0xf,
++                              (reg >> 20) & 0x3,
++                              (reg >> 14) & 0x7,
++                              (reg >> 17) & 0x7);
++}
++
++static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
++{
++    ARMCPU *arm_cpu = ARM_CPU(cpu);
++    CPUARMState *env = &arm_cpu->env;
++    const ARMCPRegInfo *ri;
++
++    ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
++    if (ri) {
++        if (ri->accessfn) {
++            if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) {
++                return false;
++            }
++        }
++        if (ri->type & ARM_CP_CONST) {
++            *val = ri->resetvalue;
++        } else if (ri->readfn) {
++            *val = ri->readfn(env, ri);
++        } else {
++            *val = CPREG_FIELD64(env, ri);
++        }
++        trace_hvf_vgic_read(ri->name, *val);
++        return true;
++    }
++
++    return false;
++}
++
+ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
+ {
+     ARMCPU *arm_cpu = ARM_CPU(cpu);
+@@ -839,6 +903,36 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
+     case SYSREG_OSDLR_EL1:
+         /* Dummy register */
+         break;
++    case SYSREG_ICC_AP0R0_EL1:
++    case SYSREG_ICC_AP0R1_EL1:
++    case SYSREG_ICC_AP0R2_EL1:
++    case SYSREG_ICC_AP0R3_EL1:
++    case SYSREG_ICC_AP1R0_EL1:
++    case SYSREG_ICC_AP1R1_EL1:
++    case SYSREG_ICC_AP1R2_EL1:
++    case SYSREG_ICC_AP1R3_EL1:
++    case SYSREG_ICC_ASGI1R_EL1:
++    case SYSREG_ICC_BPR0_EL1:
++    case SYSREG_ICC_BPR1_EL1:
++    case SYSREG_ICC_DIR_EL1:
++    case SYSREG_ICC_EOIR0_EL1:
++    case SYSREG_ICC_EOIR1_EL1:
++    case SYSREG_ICC_HPPIR0_EL1:
++    case SYSREG_ICC_HPPIR1_EL1:
++    case SYSREG_ICC_IAR0_EL1:
++    case SYSREG_ICC_IAR1_EL1:
++    case SYSREG_ICC_IGRPEN0_EL1:
++    case SYSREG_ICC_IGRPEN1_EL1:
++    case SYSREG_ICC_PMR_EL1:
++    case SYSREG_ICC_SGI0R_EL1:
++    case SYSREG_ICC_SGI1R_EL1:
++    case SYSREG_ICC_SRE_EL1:
++    case SYSREG_ICC_CTLR_EL1:
++        /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
++        if (!hvf_sysreg_read_cp(cpu, reg, &val)) {
++            hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
++        }
++        break;
+     default:
+         if (is_id_sysreg(reg)) {
+             /* ID system registers read as RES0 */
+@@ -944,6 +1038,33 @@ static void pmswinc_write(CPUARMState *env, uint64_t value)
+     }
+ }
+ 
++static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val)
++{
++    ARMCPU *arm_cpu = ARM_CPU(cpu);
++    CPUARMState *env = &arm_cpu->env;
++    const ARMCPRegInfo *ri;
++
++    ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
++
++    if (ri) {
++        if (ri->accessfn) {
++            if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) {
++                return false;
++            }
++        }
++        if (ri->writefn) {
++            ri->writefn(env, ri, val);
++        } else {
++            CPREG_FIELD64(env, ri) = val;
++        }
++
++        trace_hvf_vgic_write(ri->name, val);
++        return true;
++    }
++
++    return false;
++}
++
+ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
+ {
+     ARMCPU *arm_cpu = ARM_CPU(cpu);
+@@ -1021,6 +1142,36 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
+     case SYSREG_OSDLR_EL1:
+         /* Dummy register */
+         break;
++    case SYSREG_ICC_AP0R0_EL1:
++    case SYSREG_ICC_AP0R1_EL1:
++    case SYSREG_ICC_AP0R2_EL1:
++    case SYSREG_ICC_AP0R3_EL1:
++    case SYSREG_ICC_AP1R0_EL1:
++    case SYSREG_ICC_AP1R1_EL1:
++    case SYSREG_ICC_AP1R2_EL1:
++    case SYSREG_ICC_AP1R3_EL1:
++    case SYSREG_ICC_ASGI1R_EL1:
++    case SYSREG_ICC_BPR0_EL1:
++    case SYSREG_ICC_BPR1_EL1:
++    case SYSREG_ICC_CTLR_EL1:
++    case SYSREG_ICC_DIR_EL1:
++    case SYSREG_ICC_EOIR0_EL1:
++    case SYSREG_ICC_EOIR1_EL1:
++    case SYSREG_ICC_HPPIR0_EL1:
++    case SYSREG_ICC_HPPIR1_EL1:
++    case SYSREG_ICC_IAR0_EL1:
++    case SYSREG_ICC_IAR1_EL1:
++    case SYSREG_ICC_IGRPEN0_EL1:
++    case SYSREG_ICC_IGRPEN1_EL1:
++    case SYSREG_ICC_PMR_EL1:
++    case SYSREG_ICC_SGI0R_EL1:
++    case SYSREG_ICC_SGI1R_EL1:
++    case SYSREG_ICC_SRE_EL1:
++        /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
++        if (!hvf_sysreg_write_cp(cpu, reg, val)) {
++            hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
++        }
++        break;
+     default:
+         cpu_synchronize_state(cpu);
+         trace_hvf_unhandled_sysreg_write(env->pc, reg,
+diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
+index 820e8e0297..4fbbe4b45e 100644
+--- a/target/arm/hvf/trace-events
++++ b/target/arm/hvf/trace-events
+@@ -9,3 +9,5 @@ hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
+ hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
+ hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
+ hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
++hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]"
++hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]"

+ 0 - 2760
patches/qemu-8.0.2-utm.patch

@@ -1,2760 +0,0 @@
-From patchwork Fri Dec 23 08:50:46 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
-X-Patchwork-Id: 13080757
-Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
-	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
-	(No client certificate requested)
-	by smtp.lore.kernel.org (Postfix) with ESMTPS id 07CC3C4167B
-	for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
-Received: from localhost ([::1] helo=lists1p.gnu.org)
-	by lists.gnu.org with esmtp (Exim 4.90_1)
-	(envelope-from <qemu-devel-bounces@nongnu.org>)
-	id 1p8dlq-0007Qq-3N; Fri, 23 Dec 2022 03:51:15 -0500
-Received: from eggs.gnu.org ([2001:470:142:3::10])
- by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
- (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
- id 1p8dlb-0007K8-Ti; Fri, 23 Dec 2022 03:51:00 -0500
-Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
- by eggs.gnu.org with esmtp (Exim 4.90_1)
- (envelope-from <agraf@csgraf.de>)
- id 1p8dlZ-00046m-Nu; Fri, 23 Dec 2022 03:50:59 -0500
-Received: from localhost.localdomain
- (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
- by csgraf.de (Postfix) with ESMTPSA id 0231260804D4;
- Fri, 23 Dec 2022 09:50:48 +0100 (CET)
-From: Alexander Graf <agraf@csgraf.de>
-To: qemu-devel@nongnu.org
-Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
- Yanan Wang <wangyanan55@huawei.com>,
- =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
- Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
- Eduardo Habkost <eduardo@habkost.net>,
- Shashi Mallela <shashi.mallela@linaro.org>,
- Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
-Subject: [PATCH 1/2] hw/intc/arm_gicv3: Make ITT entry size configurable
-Date: Fri, 23 Dec 2022 09:50:46 +0100
-Message-Id: <20221223085047.94832-2-agraf@csgraf.de>
-X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
-In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
-References: <20221223085047.94832-1-agraf@csgraf.de>
-MIME-Version: 1.0
-Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
- helo=zulu616.server4you.de
-X-Spam_score_int: -18
-X-Spam_score: -1.9
-X-Spam_bar: -
-X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
- SPF_PASS=-0.001 autolearn=ham autolearn_force=no
-X-Spam_action: no action
-X-BeenThere: qemu-devel@nongnu.org
-X-Mailman-Version: 2.1.29
-Precedence: list
-List-Id: <qemu-devel.nongnu.org>
-List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
- <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
-List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
-List-Post: <mailto:qemu-devel@nongnu.org>
-List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
-List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
- <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
-Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
-Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
-
-An ITT entry is opaque to the OS. The only thing it does get told by HW is
-its size. In theory, that size can be any byte aligned number, in practice
-HW will always use power of 2s to simplify offset calculation. We currently
-expose the size as 12, which is not a power of 2.
-
-To prepare for a future where we expose power of 2 sized entry sizes, let's
-make the size itself configurable. We only need to watch out that we don't
-have an entry be smaller than the fields we want to access inside. Bigger
-is always fine.
-
-Signed-off-by: Alexander Graf <agraf@csgraf.de>
----
- hw/intc/arm_gicv3_its.c                | 14 +++++++++++---
- hw/intc/gicv3_internal.h               |  2 +-
- include/hw/intc/arm_gicv3_its_common.h |  1 +
- 3 files changed, 13 insertions(+), 4 deletions(-)
-
-diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
-index 57c79da5c5..e7cabeb46c 100644
---- a/hw/intc/arm_gicv3_its.c
-+++ b/hw/intc/arm_gicv3_its.c
-@@ -215,7 +215,7 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte,
- {
-     AddressSpace *as = &s->gicv3->dma_as;
-     MemTxResult res = MEMTX_OK;
--    hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
-+    hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
-     uint64_t itel = 0;
-     uint32_t iteh = 0;
- 
-@@ -253,7 +253,7 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid,
-     MemTxResult res = MEMTX_OK;
-     uint64_t itel;
-     uint32_t iteh;
--    hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
-+    hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
- 
-     itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res);
-     if (res != MEMTX_OK) {
-@@ -1934,6 +1934,12 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
-         }
-     }
- 
-+    if (s->itt_entry_size < MIN_ITS_ITT_ENTRY_SIZE) {
-+        error_setg(errp, "ITT entry size must be at least %d",
-+                   MIN_ITS_ITT_ENTRY_SIZE);
-+        return;
-+    }
-+
-     gicv3_add_its(s->gicv3, dev);
- 
-     gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
-@@ -1941,7 +1947,7 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
-     /* set the ITS default features supported */
-     s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL, 1);
-     s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE,
--                          ITS_ITT_ENTRY_SIZE - 1);
-+                          s->itt_entry_size - 1);
-     s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS);
-     s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS);
-     s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1);
-@@ -2008,6 +2014,8 @@ static void gicv3_its_post_load(GICv3ITSState *s)
- static Property gicv3_its_props[] = {
-     DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
-                      GICv3State *),
-+    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
-+                      MIN_ITS_ITT_ENTRY_SIZE),
-     DEFINE_PROP_END_OF_LIST(),
- };
- 
-diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
-index 29d5cdc1b6..2aca1ba095 100644
---- a/hw/intc/gicv3_internal.h
-+++ b/hw/intc/gicv3_internal.h
-@@ -450,7 +450,7 @@ FIELD(VINVALL_1, VPEID, 32, 16)
-  * the value of that field in memory cannot be relied upon -- older
-  * versions of QEMU did not correctly write to that memory.)
-  */
--#define ITS_ITT_ENTRY_SIZE            0xC
-+#define MIN_ITS_ITT_ENTRY_SIZE            0xC
- 
- FIELD(ITE_L, VALID, 0, 1)
- FIELD(ITE_L, INTTYPE, 1, 1)
-diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
-index a11a0f6654..e730a5482c 100644
---- a/include/hw/intc/arm_gicv3_its_common.h
-+++ b/include/hw/intc/arm_gicv3_its_common.h
-@@ -66,6 +66,7 @@ struct GICv3ITSState {
-     int dev_fd; /* kvm device fd if backed by kvm vgic support */
-     uint64_t gits_translater_gpa;
-     bool translater_gpa_known;
-+    uint8_t itt_entry_size;
- 
-     /* Registers */
-     uint32_t ctlr;
-
-From patchwork Fri Dec 23 08:50:47 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
-X-Patchwork-Id: 13080758
-Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
-	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
-	(No client certificate requested)
-	by smtp.lore.kernel.org (Postfix) with ESMTPS id 07C93C4332F
-	for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
-Received: from localhost ([::1] helo=lists1p.gnu.org)
-	by lists.gnu.org with esmtp (Exim 4.90_1)
-	(envelope-from <qemu-devel-bounces@nongnu.org>)
-	id 1p8dlw-0007Sh-C5; Fri, 23 Dec 2022 03:51:20 -0500
-Received: from eggs.gnu.org ([2001:470:142:3::10])
- by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
- (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
- id 1p8dle-0007LZ-9W; Fri, 23 Dec 2022 03:51:02 -0500
-Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
- by eggs.gnu.org with esmtp (Exim 4.90_1)
- (envelope-from <agraf@csgraf.de>)
- id 1p8dlc-00046r-NI; Fri, 23 Dec 2022 03:51:02 -0500
-Received: from localhost.localdomain
- (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
- by csgraf.de (Postfix) with ESMTPSA id 747226080975;
- Fri, 23 Dec 2022 09:50:49 +0100 (CET)
-From: Alexander Graf <agraf@csgraf.de>
-To: qemu-devel@nongnu.org
-Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
- Yanan Wang <wangyanan55@huawei.com>,
- =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
- Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
- Eduardo Habkost <eduardo@habkost.net>,
- Shashi Mallela <shashi.mallela@linaro.org>,
- Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
-Subject: [PATCH 2/2] hw/intc/arm_gicv3: Bump ITT entry size to 16
-Date: Fri, 23 Dec 2022 09:50:47 +0100
-Message-Id: <20221223085047.94832-3-agraf@csgraf.de>
-X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
-In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
-References: <20221223085047.94832-1-agraf@csgraf.de>
-MIME-Version: 1.0
-Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
- helo=zulu616.server4you.de
-X-Spam_score_int: -18
-X-Spam_score: -1.9
-X-Spam_bar: -
-X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
- SPF_PASS=-0.001 autolearn=ham autolearn_force=no
-X-Spam_action: no action
-X-BeenThere: qemu-devel@nongnu.org
-X-Mailman-Version: 2.1.29
-Precedence: list
-List-Id: <qemu-devel.nongnu.org>
-List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
- <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
-List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
-List-Post: <mailto:qemu-devel@nongnu.org>
-List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
-List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
- <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
-Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
-Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
-
-Some Operating Systems (like Windows) can only deal with ITT entry sizes
-that are a power of 2. While the spec allows arbitrarily sized ITT entry
-sizes, in practice all hardware will use power of 2 because that
-simplifies offset calculation and ensures that a power of 2 sized region
-can hold a set of entries without gap at the end.
-
-So let's just bump the entry size to 16. That gives us enough space for
-the 12 bytes of data that we want to have in each ITT entry and makes
-QEMU look a bit more like real hardware.
-
-Signed-off-by: Alexander Graf <agraf@csgraf.de>
----
- hw/core/machine.c       | 4 +++-
- hw/intc/arm_gicv3_its.c | 3 +--
- 2 files changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/hw/core/machine.c b/hw/core/machine.c
-index 8d34caa31d..c81b3810c2 100644
---- a/hw/core/machine.c
-+++ b/hw/core/machine.c
-@@ -51,6 +51,7 @@ GlobalProperty hw_compat_7_1[] = {
-     { "virtio-rng-pci", "vectors", "0" },
-     { "virtio-rng-pci-transitional", "vectors", "0" },
-     { "virtio-rng-pci-non-transitional", "vectors", "0" },
-+    { "arm-gicv3-its", "itt-entry-size", "12" },
- };
- const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1);
- 
-diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
-index e7cabeb46c..6754523321 100644
---- a/hw/intc/arm_gicv3_its.c
-+++ b/hw/intc/arm_gicv3_its.c
-@@ -2014,8 +2014,7 @@ static void gicv3_its_post_load(GICv3ITSState *s)
- static Property gicv3_its_props[] = {
-     DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
-                      GICv3State *),
--    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
--                      MIN_ITS_ITT_ENTRY_SIZE),
-+    DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size, 16),
-     DEFINE_PROP_END_OF_LIST(),
- };
- 
-From 8eb68166a76802a18bead9bc966eb97a723eb3a8 Mon Sep 17 00:00:00 2001
-From: osy <50960678+osy@users.noreply.github.com>
-Date: Sat, 8 Jul 2023 14:41:16 -0700
-Subject: [PATCH] edk2: add secure boot variant to aarch64 firmware
-
----
- pc-bios/meson.build                     |   1 +
- 2 files changed, 1 insertion(+)
- create mode 100644 pc-bios/edk2-aarch64-secure-code.fd.bz2
-
-diff --git a/pc-bios/meson.build b/pc-bios/meson.build
-index a7224ef469..e99e7cdda7 100644
---- a/pc-bios/meson.build
-+++ b/pc-bios/meson.build
-@@ -2,6 +2,7 @@ roms = []
- if unpack_edk2_blobs
-   fds = [
-     'edk2-aarch64-code.fd',
-+    'edk2-aarch64-secure-code.fd',
-     'edk2-arm-code.fd',
-     'edk2-arm-vars.fd',
-     'edk2-i386-code.fd',
--- 
-2.39.2 (Apple Git-143)
-
-From f1e0d530d3e2892047031c0f37cab5ed6ca2bc85 Mon Sep 17 00:00:00 2001
-From: osy <osy@turing.llc>
-Date: Sun, 16 Jul 2023 18:13:52 -0700
-Subject: [PATCH 00/12] *** SUBJECT HERE ***
-
-*** BLURB HERE ***
-
-Joelle van Dyne (11):
-  tpm_crb: refactor common code
-  tpm_crb: CTRL_RSP_ADDR is 64-bits wide
-  tpm_ppi: refactor memory space initialization
-  tpm_crb: use a single read-as-mem/write-as-mmio mapping
-  tpm_crb: use the ISA bus
-  tpm_crb: move ACPI table building to device interface
-  hw/arm/virt: add plug handler for TPM on SysBus
-  hw/loongarch/virt: add plug handler for TPM on SysBus
-  tpm_tis_sysbus: fix crash when PPI is enabled
-  tpm_tis_sysbus: move DSDT AML generation to device
-  tpm_crb_sysbus: introduce TPM CRB SysBus device
-
-osy (1):
-  sysbus-fdt: falsely claim TPM CRB device is TPM TIS
-
- docs/specs/tpm.rst          |   2 +
- hw/acpi/aml-build.c         |   7 +-
- hw/arm/Kconfig              |   1 +
- hw/arm/virt-acpi-build.c    |  38 +----
- hw/arm/virt.c               |  37 +++++
- hw/core/sysbus-fdt.c        |   1 +
- hw/i386/acpi-build.c        |  23 ---
- hw/loongarch/acpi-build.c   |  38 +----
- hw/loongarch/virt.c         |  38 +++++
- hw/riscv/Kconfig            |   1 +
- hw/riscv/virt.c             |   1 +
- hw/tpm/Kconfig              |   7 +-
- hw/tpm/meson.build          |   3 +
- hw/tpm/tpm_crb.c            | 307 ++++++++----------------------------
- hw/tpm/tpm_crb.h            |  74 +++++++++
- hw/tpm/tpm_crb_common.c     | 226 ++++++++++++++++++++++++++
- hw/tpm/tpm_crb_sysbus.c     | 178 +++++++++++++++++++++
- hw/tpm/tpm_ppi.c            |   5 +-
- hw/tpm/tpm_ppi.h            |  10 +-
- hw/tpm/tpm_tis_isa.c        |   5 +-
- hw/tpm/tpm_tis_sysbus.c     |  43 +++++
- hw/tpm/trace-events         |   2 +-
- include/hw/acpi/aml-build.h |   1 +
- include/hw/acpi/tpm.h       |   3 +-
- include/sysemu/tpm.h        |   3 +
- tests/qtest/tpm-crb-test.c  |   2 +-
- tests/qtest/tpm-util.c      |   2 +-
- 27 files changed, 704 insertions(+), 354 deletions(-)
- create mode 100644 hw/tpm/tpm_crb.h
- create mode 100644 hw/tpm/tpm_crb_common.c
- create mode 100644 hw/tpm/tpm_crb_sysbus.c
-
--- 
-2.39.2 (Apple Git-143)
-
-From 14b7a453f05de772893f4e19e5c0633f404bd030 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Mon, 10 Jul 2023 22:52:00 -0700
-Subject: [PATCH 01/12] tpm_crb: refactor common code
-
-In preparation for the SysBus variant, we move common code styled
-after the TPM TIS devices.
-
-To maintain compatibility, we do not rename the existing tpm-crb
-device.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- docs/specs/tpm.rst      |   1 +
- hw/tpm/meson.build      |   1 +
- hw/tpm/tpm_crb.c        | 270 ++++++----------------------------------
- hw/tpm/tpm_crb.h        |  76 +++++++++++
- hw/tpm/tpm_crb_common.c | 218 ++++++++++++++++++++++++++++++++
- hw/tpm/trace-events     |   2 +-
- 6 files changed, 333 insertions(+), 235 deletions(-)
- create mode 100644 hw/tpm/tpm_crb.h
- create mode 100644 hw/tpm/tpm_crb_common.c
-
-diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst
-index 535912a92b..ab79da0ecb 100644
---- a/docs/specs/tpm.rst
-+++ b/docs/specs/tpm.rst
-@@ -41,6 +41,7 @@ operating system.
- 
- QEMU files related to TPM CRB interface:
-  - ``hw/tpm/tpm_crb.c``
-+ - ``hw/tpm/tpm_crb_common.c``
- 
- SPAPR interface
- ---------------
-diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build
-index 7abc2d794a..822d3a11ca 100644
---- a/hw/tpm/meson.build
-+++ b/hw/tpm/meson.build
-@@ -2,6 +2,7 @@ softmmu_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_tis_common.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_TIS_ISA', if_true: files('tpm_tis_isa.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_TIS_SYSBUS', if_true: files('tpm_tis_sysbus.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c'))
-+softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb_common.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c'))
- 
-diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
-index ea930da545..3ef4977fb5 100644
---- a/hw/tpm/tpm_crb.c
-+++ b/hw/tpm/tpm_crb.c
-@@ -31,257 +31,62 @@
- #include "tpm_ppi.h"
- #include "trace.h"
- #include "qom/object.h"
-+#include "tpm_crb.h"
- 
- struct CRBState {
-     DeviceState parent_obj;
- 
--    TPMBackend *tpmbe;
--    TPMBackendCmd cmd;
--    uint32_t regs[TPM_CRB_R_MAX];
--    MemoryRegion mmio;
--    MemoryRegion cmdmem;
--
--    size_t be_buffer_size;
--
--    bool ppi_enabled;
--    TPMPPI ppi;
-+    TPMCRBState state;
- };
- typedef struct CRBState CRBState;
- 
- DECLARE_INSTANCE_CHECKER(CRBState, CRB,
-                          TYPE_TPM_CRB)
- 
--#define CRB_INTF_TYPE_CRB_ACTIVE 0b1
--#define CRB_INTF_VERSION_CRB 0b1
--#define CRB_INTF_CAP_LOCALITY_0_ONLY 0b0
--#define CRB_INTF_CAP_IDLE_FAST 0b0
--#define CRB_INTF_CAP_XFER_SIZE_64 0b11
--#define CRB_INTF_CAP_FIFO_NOT_SUPPORTED 0b0
--#define CRB_INTF_CAP_CRB_SUPPORTED 0b1
--#define CRB_INTF_IF_SELECTOR_CRB 0b1
--
--#define CRB_CTRL_CMD_SIZE (TPM_CRB_ADDR_SIZE - A_CRB_DATA_BUFFER)
--
--enum crb_loc_ctrl {
--    CRB_LOC_CTRL_REQUEST_ACCESS = BIT(0),
--    CRB_LOC_CTRL_RELINQUISH = BIT(1),
--    CRB_LOC_CTRL_SEIZE = BIT(2),
--    CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT = BIT(3),
--};
--
--enum crb_ctrl_req {
--    CRB_CTRL_REQ_CMD_READY = BIT(0),
--    CRB_CTRL_REQ_GO_IDLE = BIT(1),
--};
--
--enum crb_start {
--    CRB_START_INVOKE = BIT(0),
--};
--
--enum crb_cancel {
--    CRB_CANCEL_INVOKE = BIT(0),
--};
--
--#define TPM_CRB_NO_LOCALITY 0xff
--
--static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
--                                  unsigned size)
--{
--    CRBState *s = CRB(opaque);
--    void *regs = (void *)&s->regs + (addr & ~3);
--    unsigned offset = addr & 3;
--    uint32_t val = *(uint32_t *)regs >> (8 * offset);
--
--    switch (addr) {
--    case A_CRB_LOC_STATE:
--        val |= !tpm_backend_get_tpm_established_flag(s->tpmbe);
--        break;
--    }
--
--    trace_tpm_crb_mmio_read(addr, size, val);
--
--    return val;
--}
--
--static uint8_t tpm_crb_get_active_locty(CRBState *s)
--{
--    if (!ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, locAssigned)) {
--        return TPM_CRB_NO_LOCALITY;
--    }
--    return ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, activeLocality);
--}
--
--static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
--                               uint64_t val, unsigned size)
--{
--    CRBState *s = CRB(opaque);
--    uint8_t locty =  addr >> 12;
--
--    trace_tpm_crb_mmio_write(addr, size, val);
--
--    switch (addr) {
--    case A_CRB_CTRL_REQ:
--        switch (val) {
--        case CRB_CTRL_REQ_CMD_READY:
--            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
--                             tpmIdle, 0);
--            break;
--        case CRB_CTRL_REQ_GO_IDLE:
--            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
--                             tpmIdle, 1);
--            break;
--        }
--        break;
--    case A_CRB_CTRL_CANCEL:
--        if (val == CRB_CANCEL_INVOKE &&
--            s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
--            tpm_backend_cancel_cmd(s->tpmbe);
--        }
--        break;
--    case A_CRB_CTRL_START:
--        if (val == CRB_START_INVOKE &&
--            !(s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) &&
--            tpm_crb_get_active_locty(s) == locty) {
--            void *mem = memory_region_get_ram_ptr(&s->cmdmem);
--
--            s->regs[R_CRB_CTRL_START] |= CRB_START_INVOKE;
--            s->cmd = (TPMBackendCmd) {
--                .in = mem,
--                .in_len = MIN(tpm_cmd_get_size(mem), s->be_buffer_size),
--                .out = mem,
--                .out_len = s->be_buffer_size,
--            };
--
--            tpm_backend_deliver_request(s->tpmbe, &s->cmd);
--        }
--        break;
--    case A_CRB_LOC_CTRL:
--        switch (val) {
--        case CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT:
--            /* not loc 3 or 4 */
--            break;
--        case CRB_LOC_CTRL_RELINQUISH:
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
--                             locAssigned, 0);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
--                             Granted, 0);
--            break;
--        case CRB_LOC_CTRL_REQUEST_ACCESS:
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
--                             Granted, 1);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
--                             beenSeized, 0);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
--                             locAssigned, 1);
--            break;
--        }
--        break;
--    }
--}
--
--static const MemoryRegionOps tpm_crb_memory_ops = {
--    .read = tpm_crb_mmio_read,
--    .write = tpm_crb_mmio_write,
--    .endianness = DEVICE_LITTLE_ENDIAN,
--    .valid = {
--        .min_access_size = 1,
--        .max_access_size = 4,
--    },
--};
--
--static void tpm_crb_request_completed(TPMIf *ti, int ret)
-+static void tpm_crb_none_request_completed(TPMIf *ti, int ret)
- {
-     CRBState *s = CRB(ti);
- 
--    s->regs[R_CRB_CTRL_START] &= ~CRB_START_INVOKE;
--    if (ret != 0) {
--        ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
--                         tpmSts, 1); /* fatal error */
--    }
--    memory_region_set_dirty(&s->cmdmem, 0, CRB_CTRL_CMD_SIZE);
-+    tpm_crb_request_completed(&s->state, ret);
- }
- 
--static enum TPMVersion tpm_crb_get_version(TPMIf *ti)
-+static enum TPMVersion tpm_crb_none_get_version(TPMIf *ti)
- {
-     CRBState *s = CRB(ti);
- 
--    return tpm_backend_get_tpm_version(s->tpmbe);
-+    return tpm_crb_get_version(&s->state);
- }
- 
--static int tpm_crb_pre_save(void *opaque)
-+static int tpm_crb_none_pre_save(void *opaque)
- {
-     CRBState *s = opaque;
- 
--    tpm_backend_finish_sync(s->tpmbe);
--
--    return 0;
-+    return tpm_crb_pre_save(&s->state);
- }
- 
--static const VMStateDescription vmstate_tpm_crb = {
-+static const VMStateDescription vmstate_tpm_crb_none = {
-     .name = "tpm-crb",
--    .pre_save = tpm_crb_pre_save,
-+    .pre_save = tpm_crb_none_pre_save,
-     .fields = (VMStateField[]) {
--        VMSTATE_UINT32_ARRAY(regs, CRBState, TPM_CRB_R_MAX),
-+        VMSTATE_UINT32_ARRAY(state.regs, CRBState, TPM_CRB_R_MAX),
-         VMSTATE_END_OF_LIST(),
-     }
- };
- 
--static Property tpm_crb_properties[] = {
--    DEFINE_PROP_TPMBE("tpmdev", CRBState, tpmbe),
--    DEFINE_PROP_BOOL("ppi", CRBState, ppi_enabled, true),
-+static Property tpm_crb_none_properties[] = {
-+    DEFINE_PROP_TPMBE("tpmdev", CRBState, state.tpmbe),
-+    DEFINE_PROP_BOOL("ppi", CRBState, state.ppi_enabled, true),
-     DEFINE_PROP_END_OF_LIST(),
- };
- 
--static void tpm_crb_reset(void *dev)
-+static void tpm_crb_none_reset(void *dev)
- {
-     CRBState *s = CRB(dev);
- 
--    if (s->ppi_enabled) {
--        tpm_ppi_reset(&s->ppi);
--    }
--    tpm_backend_reset(s->tpmbe);
--
--    memset(s->regs, 0, sizeof(s->regs));
--
--    ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
--                     tpmRegValidSts, 1);
--    ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
--                     tpmIdle, 1);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     InterfaceType, CRB_INTF_TYPE_CRB_ACTIVE);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     InterfaceVersion, CRB_INTF_VERSION_CRB);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     CapLocality, CRB_INTF_CAP_LOCALITY_0_ONLY);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     CapCRBIdleBypass, CRB_INTF_CAP_IDLE_FAST);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     CapDataXferSizeSupport, CRB_INTF_CAP_XFER_SIZE_64);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     CapFIFO, CRB_INTF_CAP_FIFO_NOT_SUPPORTED);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     CapCRB, CRB_INTF_CAP_CRB_SUPPORTED);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     InterfaceSelector, CRB_INTF_IF_SELECTOR_CRB);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
--                     RID, 0b0000);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID2,
--                     VID, PCI_VENDOR_ID_IBM);
--
--    s->regs[R_CRB_CTRL_CMD_SIZE] = CRB_CTRL_CMD_SIZE;
--    s->regs[R_CRB_CTRL_CMD_LADDR] = TPM_CRB_ADDR_BASE + A_CRB_DATA_BUFFER;
--    s->regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
--    s->regs[R_CRB_CTRL_RSP_ADDR] = TPM_CRB_ADDR_BASE + A_CRB_DATA_BUFFER;
--
--    s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe),
--                            CRB_CTRL_CMD_SIZE);
--
--    if (tpm_backend_startup_tpm(s->tpmbe, s->be_buffer_size) < 0) {
--        exit(1);
--    }
-+    return tpm_crb_reset(&s->state, TPM_CRB_ADDR_BASE);
- }
- 
--static void tpm_crb_realize(DeviceState *dev, Error **errp)
-+static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
- {
-     CRBState *s = CRB(dev);
- 
-@@ -289,64 +94,61 @@ static void tpm_crb_realize(DeviceState *dev, Error **errp)
-         error_setg(errp, "at most one TPM device is permitted");
-         return;
-     }
--    if (!s->tpmbe) {
-+    if (!s->state.tpmbe) {
-         error_setg(errp, "'tpmdev' property is required");
-         return;
-     }
- 
--    memory_region_init_io(&s->mmio, OBJECT(s), &tpm_crb_memory_ops, s,
--        "tpm-crb-mmio", sizeof(s->regs));
--    memory_region_init_ram(&s->cmdmem, OBJECT(s),
--        "tpm-crb-cmd", CRB_CTRL_CMD_SIZE, errp);
-+    tpm_crb_init_memory(OBJECT(s), &s->state, errp);
- 
-     memory_region_add_subregion(get_system_memory(),
--        TPM_CRB_ADDR_BASE, &s->mmio);
-+        TPM_CRB_ADDR_BASE, &s->state.mmio);
-     memory_region_add_subregion(get_system_memory(),
--        TPM_CRB_ADDR_BASE + sizeof(s->regs), &s->cmdmem);
-+        TPM_CRB_ADDR_BASE + sizeof(s->state.regs), &s->state.cmdmem);
- 
--    if (s->ppi_enabled) {
--        tpm_ppi_init(&s->ppi, get_system_memory(),
-+    if (s->state.ppi_enabled) {
-+        tpm_ppi_init(&s->state.ppi, get_system_memory(),
-                      TPM_PPI_ADDR_BASE, OBJECT(s));
-     }
- 
-     if (xen_enabled()) {
--        tpm_crb_reset(dev);
-+        tpm_crb_none_reset(dev);
-     } else {
--        qemu_register_reset(tpm_crb_reset, dev);
-+        qemu_register_reset(tpm_crb_none_reset, dev);
-     }
- }
- 
--static void tpm_crb_class_init(ObjectClass *klass, void *data)
-+static void tpm_crb_none_class_init(ObjectClass *klass, void *data)
- {
-     DeviceClass *dc = DEVICE_CLASS(klass);
-     TPMIfClass *tc = TPM_IF_CLASS(klass);
- 
--    dc->realize = tpm_crb_realize;
--    device_class_set_props(dc, tpm_crb_properties);
--    dc->vmsd  = &vmstate_tpm_crb;
-+    dc->realize = tpm_crb_none_realize;
-+    device_class_set_props(dc, tpm_crb_none_properties);
-+    dc->vmsd  = &vmstate_tpm_crb_none;
-     dc->user_creatable = true;
-     tc->model = TPM_MODEL_TPM_CRB;
--    tc->get_version = tpm_crb_get_version;
--    tc->request_completed = tpm_crb_request_completed;
-+    tc->get_version = tpm_crb_none_get_version;
-+    tc->request_completed = tpm_crb_none_request_completed;
- 
-     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
- }
- 
--static const TypeInfo tpm_crb_info = {
-+static const TypeInfo tpm_crb_none_info = {
-     .name = TYPE_TPM_CRB,
-     /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
-     .parent = TYPE_DEVICE,
-     .instance_size = sizeof(CRBState),
--    .class_init  = tpm_crb_class_init,
-+    .class_init  = tpm_crb_none_class_init,
-     .interfaces = (InterfaceInfo[]) {
-         { TYPE_TPM_IF },
-         { }
-     }
- };
- 
--static void tpm_crb_register(void)
-+static void tpm_crb_none_register(void)
- {
--    type_register_static(&tpm_crb_info);
-+    type_register_static(&tpm_crb_none_info);
- }
- 
--type_init(tpm_crb_register)
-+type_init(tpm_crb_none_register)
-diff --git a/hw/tpm/tpm_crb.h b/hw/tpm/tpm_crb.h
-new file mode 100644
-index 0000000000..da3a0cf256
---- /dev/null
-+++ b/hw/tpm/tpm_crb.h
-@@ -0,0 +1,76 @@
-+/*
-+ * tpm_crb.h - QEMU's TPM CRB interface emulator
-+ *
-+ * Copyright (c) 2018 Red Hat, Inc.
-+ *
-+ * Authors:
-+ *   Marc-André Lureau <marcandre.lureau@redhat.com>
-+ *
-+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
-+ * See the COPYING file in the top-level directory.
-+ *
-+ * tpm_crb is a device for TPM 2.0 Command Response Buffer (CRB) Interface
-+ * as defined in TCG PC Client Platform TPM Profile (PTP) Specification
-+ * Family “2.0” Level 00 Revision 01.03 v22
-+ */
-+#ifndef TPM_TPM_CRB_H
-+#define TPM_TPM_CRB_H
-+
-+#include "exec/memory.h"
-+#include "hw/acpi/tpm.h"
-+#include "sysemu/tpm_backend.h"
-+#include "tpm_ppi.h"
-+
-+#define CRB_CTRL_CMD_SIZE (TPM_CRB_ADDR_SIZE - A_CRB_DATA_BUFFER)
-+
-+typedef struct TPMCRBState {
-+    TPMBackend *tpmbe;
-+    TPMBackendCmd cmd;
-+    uint32_t regs[TPM_CRB_R_MAX];
-+    MemoryRegion mmio;
-+    MemoryRegion cmdmem;
-+
-+    size_t be_buffer_size;
-+
-+    bool ppi_enabled;
-+    TPMPPI ppi;
-+} TPMCRBState;
-+
-+#define CRB_INTF_TYPE_CRB_ACTIVE 0b1
-+#define CRB_INTF_VERSION_CRB 0b1
-+#define CRB_INTF_CAP_LOCALITY_0_ONLY 0b0
-+#define CRB_INTF_CAP_IDLE_FAST 0b0
-+#define CRB_INTF_CAP_XFER_SIZE_64 0b11
-+#define CRB_INTF_CAP_FIFO_NOT_SUPPORTED 0b0
-+#define CRB_INTF_CAP_CRB_SUPPORTED 0b1
-+#define CRB_INTF_IF_SELECTOR_CRB 0b1
-+
-+enum crb_loc_ctrl {
-+    CRB_LOC_CTRL_REQUEST_ACCESS = BIT(0),
-+    CRB_LOC_CTRL_RELINQUISH = BIT(1),
-+    CRB_LOC_CTRL_SEIZE = BIT(2),
-+    CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT = BIT(3),
-+};
-+
-+enum crb_ctrl_req {
-+    CRB_CTRL_REQ_CMD_READY = BIT(0),
-+    CRB_CTRL_REQ_GO_IDLE = BIT(1),
-+};
-+
-+enum crb_start {
-+    CRB_START_INVOKE = BIT(0),
-+};
-+
-+enum crb_cancel {
-+    CRB_CANCEL_INVOKE = BIT(0),
-+};
-+
-+#define TPM_CRB_NO_LOCALITY 0xff
-+
-+void tpm_crb_request_completed(TPMCRBState *s, int ret);
-+enum TPMVersion tpm_crb_get_version(TPMCRBState *s);
-+int tpm_crb_pre_save(TPMCRBState *s);
-+void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr);
-+void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp);
-+
-+#endif /* TPM_TPM_CRB_H */
-diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
-new file mode 100644
-index 0000000000..4c173affb6
---- /dev/null
-+++ b/hw/tpm/tpm_crb_common.c
-@@ -0,0 +1,218 @@
-+/*
-+ * tpm_crb.c - QEMU's TPM CRB interface emulator
-+ *
-+ * Copyright (c) 2018 Red Hat, Inc.
-+ *
-+ * Authors:
-+ *   Marc-André Lureau <marcandre.lureau@redhat.com>
-+ *
-+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
-+ * See the COPYING file in the top-level directory.
-+ *
-+ * tpm_crb is a device for TPM 2.0 Command Response Buffer (CRB) Interface
-+ * as defined in TCG PC Client Platform TPM Profile (PTP) Specification
-+ * Family “2.0” Level 00 Revision 01.03 v22
-+ */
-+
-+#include "qemu/osdep.h"
-+
-+#include "qemu/module.h"
-+#include "qapi/error.h"
-+#include "exec/address-spaces.h"
-+#include "hw/qdev-properties.h"
-+#include "hw/pci/pci_ids.h"
-+#include "hw/acpi/tpm.h"
-+#include "migration/vmstate.h"
-+#include "sysemu/tpm_backend.h"
-+#include "sysemu/tpm_util.h"
-+#include "sysemu/reset.h"
-+#include "sysemu/xen.h"
-+#include "tpm_prop.h"
-+#include "tpm_ppi.h"
-+#include "trace.h"
-+#include "qom/object.h"
-+#include "tpm_crb.h"
-+
-+static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
-+                                  unsigned size)
-+{
-+    TPMCRBState *s = opaque;
-+    void *regs = (void *)&s->regs + (addr & ~3);
-+    unsigned offset = addr & 3;
-+    uint32_t val = *(uint32_t *)regs >> (8 * offset);
-+
-+    switch (addr) {
-+    case A_CRB_LOC_STATE:
-+        val |= !tpm_backend_get_tpm_established_flag(s->tpmbe);
-+        break;
-+    }
-+
-+    trace_tpm_crb_mmio_read(addr, size, val);
-+
-+    return val;
-+}
-+
-+static uint8_t tpm_crb_get_active_locty(TPMCRBState *s)
-+{
-+    if (!ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, locAssigned)) {
-+        return TPM_CRB_NO_LOCALITY;
-+    }
-+    return ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, activeLocality);
-+}
-+
-+static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
-+                               uint64_t val, unsigned size)
-+{
-+    TPMCRBState *s = opaque;
-+    uint8_t locty =  addr >> 12;
-+
-+    trace_tpm_crb_mmio_write(addr, size, val);
-+
-+    switch (addr) {
-+    case A_CRB_CTRL_REQ:
-+        switch (val) {
-+        case CRB_CTRL_REQ_CMD_READY:
-+            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+                             tpmIdle, 0);
-+            break;
-+        case CRB_CTRL_REQ_GO_IDLE:
-+            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+                             tpmIdle, 1);
-+            break;
-+        }
-+        break;
-+    case A_CRB_CTRL_CANCEL:
-+        if (val == CRB_CANCEL_INVOKE &&
-+            s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
-+            tpm_backend_cancel_cmd(s->tpmbe);
-+        }
-+        break;
-+    case A_CRB_CTRL_START:
-+        if (val == CRB_START_INVOKE &&
-+            !(s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) &&
-+            tpm_crb_get_active_locty(s) == locty) {
-+            void *mem = memory_region_get_ram_ptr(&s->cmdmem);
-+
-+            s->regs[R_CRB_CTRL_START] |= CRB_START_INVOKE;
-+            s->cmd = (TPMBackendCmd) {
-+                .in = mem,
-+                .in_len = MIN(tpm_cmd_get_size(mem), s->be_buffer_size),
-+                .out = mem,
-+                .out_len = s->be_buffer_size,
-+            };
-+
-+            tpm_backend_deliver_request(s->tpmbe, &s->cmd);
-+        }
-+        break;
-+    case A_CRB_LOC_CTRL:
-+        switch (val) {
-+        case CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT:
-+            /* not loc 3 or 4 */
-+            break;
-+        case CRB_LOC_CTRL_RELINQUISH:
-+            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+                             locAssigned, 0);
-+            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+                             Granted, 0);
-+            break;
-+        case CRB_LOC_CTRL_REQUEST_ACCESS:
-+            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+                             Granted, 1);
-+            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+                             beenSeized, 0);
-+            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+                             locAssigned, 1);
-+            break;
-+        }
-+        break;
-+    }
-+}
-+
-+const MemoryRegionOps tpm_crb_memory_ops = {
-+    .read = tpm_crb_mmio_read,
-+    .write = tpm_crb_mmio_write,
-+    .endianness = DEVICE_LITTLE_ENDIAN,
-+    .valid = {
-+        .min_access_size = 1,
-+        .max_access_size = 4,
-+    },
-+};
-+
-+void tpm_crb_request_completed(TPMCRBState *s, int ret)
-+{
-+    s->regs[R_CRB_CTRL_START] &= ~CRB_START_INVOKE;
-+    if (ret != 0) {
-+        ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+                         tpmSts, 1); /* fatal error */
-+    }
-+    memory_region_set_dirty(&s->cmdmem, 0, CRB_CTRL_CMD_SIZE);
-+}
-+
-+enum TPMVersion tpm_crb_get_version(TPMCRBState *s)
-+{
-+    return tpm_backend_get_tpm_version(s->tpmbe);
-+}
-+
-+int tpm_crb_pre_save(TPMCRBState *s)
-+{
-+    tpm_backend_finish_sync(s->tpmbe);
-+
-+    return 0;
-+}
-+
-+void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
-+{
-+    if (s->ppi_enabled) {
-+        tpm_ppi_reset(&s->ppi);
-+    }
-+    tpm_backend_reset(s->tpmbe);
-+
-+    memset(s->regs, 0, sizeof(s->regs));
-+
-+    ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+                     tpmRegValidSts, 1);
-+    ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+                     tpmIdle, 1);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     InterfaceType, CRB_INTF_TYPE_CRB_ACTIVE);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     InterfaceVersion, CRB_INTF_VERSION_CRB);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     CapLocality, CRB_INTF_CAP_LOCALITY_0_ONLY);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     CapCRBIdleBypass, CRB_INTF_CAP_IDLE_FAST);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     CapDataXferSizeSupport, CRB_INTF_CAP_XFER_SIZE_64);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     CapFIFO, CRB_INTF_CAP_FIFO_NOT_SUPPORTED);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     CapCRB, CRB_INTF_CAP_CRB_SUPPORTED);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     InterfaceSelector, CRB_INTF_IF_SELECTOR_CRB);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+                     RID, 0b0000);
-+    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID2,
-+                     VID, PCI_VENDOR_ID_IBM);
-+
-+    baseaddr += A_CRB_DATA_BUFFER;
-+    s->regs[R_CRB_CTRL_CMD_SIZE] = CRB_CTRL_CMD_SIZE;
-+    s->regs[R_CRB_CTRL_CMD_LADDR] = (uint32_t)baseaddr;
-+    s->regs[R_CRB_CTRL_CMD_HADDR] = (uint32_t)(baseaddr >> 32);
-+    s->regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
-+    s->regs[R_CRB_CTRL_RSP_ADDR] = (uint32_t)baseaddr;
-+
-+    s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe),
-+                            CRB_CTRL_CMD_SIZE);
-+
-+    if (tpm_backend_startup_tpm(s->tpmbe, s->be_buffer_size) < 0) {
-+        exit(1);
-+    }
-+}
-+
-+void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp)
-+{
-+    memory_region_init_io(&s->mmio, obj, &tpm_crb_memory_ops, s,
-+        "tpm-crb-mmio", sizeof(s->regs));
-+    memory_region_init_ram(&s->cmdmem, obj,
-+        "tpm-crb-cmd", CRB_CTRL_CMD_SIZE, errp);
-+}
-diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
-index f17110458e..bfc1f78b0f 100644
---- a/hw/tpm/trace-events
-+++ b/hw/tpm/trace-events
-@@ -1,6 +1,6 @@
- # See docs/devel/tracing.rst for syntax documentation.
- 
--# tpm_crb.c
-+# tpm_crb_common.c
- tpm_crb_mmio_read(uint64_t addr, unsigned size, uint32_t val) "CRB read 0x%016" PRIx64 " len:%u val: 0x%" PRIx32
- tpm_crb_mmio_write(uint64_t addr, unsigned size, uint32_t val) "CRB write 0x%016" PRIx64 " len:%u val: 0x%" PRIx32
- 
--- 
-2.39.2 (Apple Git-143)
-
-From 5b13dacc447a4bf799f1d990c687fb5e4eda7470 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Mon, 10 Jul 2023 23:08:20 -0700
-Subject: [PATCH 02/12] tpm_crb: CTRL_RSP_ADDR is 64-bits wide
-
-The register is actually 64-bits but in order to make this more clear
-than the specification, we define two 32-bit registers:
-CTRL_RSP_LADDR and CTRL_RSP_HADDR to match the CTRL_CMD_* naming. This
-deviates from the specs but is way more clear.
-
-Previously, the only CRB device uses a fixed system address so this
-was not an issue. However, once we support SysBus CRB device, the
-address can be anywhere in 64-bit space.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/tpm/tpm_crb_common.c    | 3 ++-
- include/hw/acpi/tpm.h      | 3 ++-
- tests/qtest/tpm-crb-test.c | 2 +-
- tests/qtest/tpm-util.c     | 2 +-
- 4 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
-index 4c173affb6..228e2d0faf 100644
---- a/hw/tpm/tpm_crb_common.c
-+++ b/hw/tpm/tpm_crb_common.c
-@@ -199,7 +199,8 @@ void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
-     s->regs[R_CRB_CTRL_CMD_LADDR] = (uint32_t)baseaddr;
-     s->regs[R_CRB_CTRL_CMD_HADDR] = (uint32_t)(baseaddr >> 32);
-     s->regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
--    s->regs[R_CRB_CTRL_RSP_ADDR] = (uint32_t)baseaddr;
-+    s->regs[R_CRB_CTRL_RSP_LADDR] = (uint32_t)baseaddr;
-+    s->regs[R_CRB_CTRL_RSP_HADDR] = (uint32_t)(baseaddr >> 32);
- 
-     s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe),
-                             CRB_CTRL_CMD_SIZE);
-diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h
-index 559ba6906c..f8a8d50bd6 100644
---- a/include/hw/acpi/tpm.h
-+++ b/include/hw/acpi/tpm.h
-@@ -173,7 +173,8 @@ REG32(CRB_CTRL_CMD_SIZE, 0x58)
- REG32(CRB_CTRL_CMD_LADDR, 0x5C)
- REG32(CRB_CTRL_CMD_HADDR, 0x60)
- REG32(CRB_CTRL_RSP_SIZE, 0x64)
--REG32(CRB_CTRL_RSP_ADDR, 0x68)
-+REG32(CRB_CTRL_RSP_LADDR, 0x68)
-+REG32(CRB_CTRL_RSP_HADDR, 0x6C)
- REG32(CRB_DATA_BUFFER, 0x80)
- 
- #define TPM_CRB_ADDR_BASE           0xFED40000
-diff --git a/tests/qtest/tpm-crb-test.c b/tests/qtest/tpm-crb-test.c
-index 7b94453390..ae6cffcedc 100644
---- a/tests/qtest/tpm-crb-test.c
-+++ b/tests/qtest/tpm-crb-test.c
-@@ -31,7 +31,7 @@ static void tpm_crb_test(const void *data)
-     uint32_t csize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_SIZE);
-     uint64_t caddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
-     uint32_t rsize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_SIZE);
--    uint64_t raddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_ADDR);
-+    uint64_t raddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_LADDR);
-     uint8_t locstate = readb(TPM_CRB_ADDR_BASE + A_CRB_LOC_STATE);
-     uint32_t locctrl = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL);
-     uint32_t locsts = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_STS);
-diff --git a/tests/qtest/tpm-util.c b/tests/qtest/tpm-util.c
-index a7efe2d0d2..e47c733dd1 100644
---- a/tests/qtest/tpm-util.c
-+++ b/tests/qtest/tpm-util.c
-@@ -25,7 +25,7 @@ void tpm_util_crb_transfer(QTestState *s,
-                            unsigned char *rsp, size_t rsp_size)
- {
-     uint64_t caddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
--    uint64_t raddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_ADDR);
-+    uint64_t raddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_LADDR);
- 
-     qtest_writeb(s, TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL, 1);
- 
--- 
-2.39.2 (Apple Git-143)
-
-From bfdeab6212556ed927395919a9b3a5b74f1621cc Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Tue, 11 Jul 2023 21:18:06 -0700
-Subject: [PATCH 03/12] tpm_ppi: refactor memory space initialization
-
-Instead of calling `memory_region_add_subregion` directly, we defer to
-the caller to do it. This allows us to re-use the code for a SysBus
-device.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/tpm/tpm_crb.c        |  4 ++--
- hw/tpm/tpm_crb_common.c |  3 +++
- hw/tpm/tpm_ppi.c        |  5 +----
- hw/tpm/tpm_ppi.h        | 10 +++-------
- hw/tpm/tpm_tis_isa.c    |  5 +++--
- 5 files changed, 12 insertions(+), 15 deletions(-)
-
-diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
-index 3ef4977fb5..598c3e0161 100644
---- a/hw/tpm/tpm_crb.c
-+++ b/hw/tpm/tpm_crb.c
-@@ -107,8 +107,8 @@ static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
-         TPM_CRB_ADDR_BASE + sizeof(s->state.regs), &s->state.cmdmem);
- 
-     if (s->state.ppi_enabled) {
--        tpm_ppi_init(&s->state.ppi, get_system_memory(),
--                     TPM_PPI_ADDR_BASE, OBJECT(s));
-+        memory_region_add_subregion(get_system_memory(),
-+            TPM_PPI_ADDR_BASE, &s->state.ppi.ram);
-     }
- 
-     if (xen_enabled()) {
-diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
-index 228e2d0faf..e56e910670 100644
---- a/hw/tpm/tpm_crb_common.c
-+++ b/hw/tpm/tpm_crb_common.c
-@@ -216,4 +216,7 @@ void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp)
-         "tpm-crb-mmio", sizeof(s->regs));
-     memory_region_init_ram(&s->cmdmem, obj,
-         "tpm-crb-cmd", CRB_CTRL_CMD_SIZE, errp);
-+    if (s->ppi_enabled) {
-+        tpm_ppi_init_memory(&s->ppi, obj);
-+    }
- }
-diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
-index 7f74e26ec6..40cab59afa 100644
---- a/hw/tpm/tpm_ppi.c
-+++ b/hw/tpm/tpm_ppi.c
-@@ -44,14 +44,11 @@ void tpm_ppi_reset(TPMPPI *tpmppi)
-     }
- }
- 
--void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m,
--                  hwaddr addr, Object *obj)
-+void tpm_ppi_init_memory(TPMPPI *tpmppi, Object *obj)
- {
-     tpmppi->buf = qemu_memalign(qemu_real_host_page_size(),
-                                 HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE));
-     memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi",
-                                       TPM_PPI_ADDR_SIZE, tpmppi->buf);
-     vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
--
--    memory_region_add_subregion(m, addr, &tpmppi->ram);
- }
-diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
-index bf5d4a300f..30863c6438 100644
---- a/hw/tpm/tpm_ppi.h
-+++ b/hw/tpm/tpm_ppi.h
-@@ -20,17 +20,13 @@ typedef struct TPMPPI {
- } TPMPPI;
- 
- /**
-- * tpm_ppi_init:
-+ * tpm_ppi_init_memory:
-  * @tpmppi: a TPMPPI
-- * @m: the address-space / MemoryRegion to use
-- * @addr: the address of the PPI region
-  * @obj: the owner object
-  *
-- * Register the TPM PPI memory region at @addr on the given address
-- * space for the object @obj.
-+ * Creates the TPM PPI memory region.
-  **/
--void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m,
--                  hwaddr addr, Object *obj);
-+void tpm_ppi_init_memory(TPMPPI *tpmppi, Object *obj);
- 
- /**
-  * tpm_ppi_reset:
-diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c
-index 91e3792248..7cd7415f30 100644
---- a/hw/tpm/tpm_tis_isa.c
-+++ b/hw/tpm/tpm_tis_isa.c
-@@ -134,8 +134,9 @@ static void tpm_tis_isa_realizefn(DeviceState *dev, Error **errp)
-                                 TPM_TIS_ADDR_BASE, &s->mmio);
- 
-     if (s->ppi_enabled) {
--        tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)),
--                     TPM_PPI_ADDR_BASE, OBJECT(dev));
-+        tpm_ppi_init_memory(&s->ppi, OBJECT(dev));
-+        memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
-+                                    TPM_PPI_ADDR_BASE, &s->ppi.ram);
-     }
- }
- 
--- 
-2.39.2 (Apple Git-143)
-
-From 6009eeba5384112b151ed73e245c85dc8222ef3f Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Tue, 11 Jul 2023 22:37:34 -0700
-Subject: [PATCH 04/12] tpm_crb: use a single read-as-mem/write-as-mmio mapping
-
-On Apple Silicon, when Windows performs a LDP on the CRB MMIO space,
-the exception is not decoded by hardware and we cannot trap the MMIO
-read. This led to the idea from @agraf to use the same mapping type as
-ROM devices: namely that reads should be seen as memory type and
-writes should trap as MMIO.
-
-Once that was done, the second memory mapping of the command buffer
-region was redundent and was removed.
-
-A note about the removal of the read trap for `CRB_LOC_STATE`:
-The only usage was to return the most up-to-date value for
-`tpmEstablished`. However, `tpmEstablished` is only set when a
-TPM2_HashStart operation is called which only exists for locality 4.
-Indeed, the comment for the write handler of `CRB_LOC_CTRL` makes the
-same argument for why it is not calling the backend to reset the
-`tpmEstablished` bit. As this bit is unused, we do not need to worry
-about updating it for reads.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/tpm/tpm_crb.c        |   3 -
- hw/tpm/tpm_crb.h        |   2 -
- hw/tpm/tpm_crb_common.c | 126 +++++++++++++++++++++-------------------
- 3 files changed, 65 insertions(+), 66 deletions(-)
-
-diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
-index 598c3e0161..07c6868d8d 100644
---- a/hw/tpm/tpm_crb.c
-+++ b/hw/tpm/tpm_crb.c
-@@ -68,7 +68,6 @@ static const VMStateDescription vmstate_tpm_crb_none = {
-     .name = "tpm-crb",
-     .pre_save = tpm_crb_none_pre_save,
-     .fields = (VMStateField[]) {
--        VMSTATE_UINT32_ARRAY(state.regs, CRBState, TPM_CRB_R_MAX),
-         VMSTATE_END_OF_LIST(),
-     }
- };
-@@ -103,8 +102,6 @@ static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
- 
-     memory_region_add_subregion(get_system_memory(),
-         TPM_CRB_ADDR_BASE, &s->state.mmio);
--    memory_region_add_subregion(get_system_memory(),
--        TPM_CRB_ADDR_BASE + sizeof(s->state.regs), &s->state.cmdmem);
- 
-     if (s->state.ppi_enabled) {
-         memory_region_add_subregion(get_system_memory(),
-diff --git a/hw/tpm/tpm_crb.h b/hw/tpm/tpm_crb.h
-index da3a0cf256..7cdd37335f 100644
---- a/hw/tpm/tpm_crb.h
-+++ b/hw/tpm/tpm_crb.h
-@@ -26,9 +26,7 @@
- typedef struct TPMCRBState {
-     TPMBackend *tpmbe;
-     TPMBackendCmd cmd;
--    uint32_t regs[TPM_CRB_R_MAX];
-     MemoryRegion mmio;
--    MemoryRegion cmdmem;
- 
-     size_t be_buffer_size;
- 
-diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
-index e56e910670..772ddd9bb1 100644
---- a/hw/tpm/tpm_crb_common.c
-+++ b/hw/tpm/tpm_crb_common.c
-@@ -33,31 +33,12 @@
- #include "qom/object.h"
- #include "tpm_crb.h"
- 
--static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
--                                  unsigned size)
-+static uint8_t tpm_crb_get_active_locty(TPMCRBState *s, uint32_t *regs)
- {
--    TPMCRBState *s = opaque;
--    void *regs = (void *)&s->regs + (addr & ~3);
--    unsigned offset = addr & 3;
--    uint32_t val = *(uint32_t *)regs >> (8 * offset);
--
--    switch (addr) {
--    case A_CRB_LOC_STATE:
--        val |= !tpm_backend_get_tpm_established_flag(s->tpmbe);
--        break;
--    }
--
--    trace_tpm_crb_mmio_read(addr, size, val);
--
--    return val;
--}
--
--static uint8_t tpm_crb_get_active_locty(TPMCRBState *s)
--{
--    if (!ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, locAssigned)) {
-+    if (!ARRAY_FIELD_EX32(regs, CRB_LOC_STATE, locAssigned)) {
-         return TPM_CRB_NO_LOCALITY;
-     }
--    return ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, activeLocality);
-+    return ARRAY_FIELD_EX32(regs, CRB_LOC_STATE, activeLocality);
- }
- 
- static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
-@@ -65,35 +46,47 @@ static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
- {
-     TPMCRBState *s = opaque;
-     uint8_t locty =  addr >> 12;
-+    uint32_t *regs;
-+    void *mem;
- 
-     trace_tpm_crb_mmio_write(addr, size, val);
-+    regs = memory_region_get_ram_ptr(&s->mmio);
-+    mem = &regs[R_CRB_DATA_BUFFER];
-+    assert(regs);
-+
-+    if (addr >= A_CRB_DATA_BUFFER) {
-+        assert(addr + size <= TPM_CRB_ADDR_SIZE);
-+        assert(size <= sizeof(val));
-+        memcpy(mem + addr - A_CRB_DATA_BUFFER, &val, size);
-+        memory_region_set_dirty(&s->mmio, addr, size);
-+        return;
-+    }
- 
-     switch (addr) {
-     case A_CRB_CTRL_REQ:
-         switch (val) {
-         case CRB_CTRL_REQ_CMD_READY:
--            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+            ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
-                              tpmIdle, 0);
-             break;
-         case CRB_CTRL_REQ_GO_IDLE:
--            ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+            ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
-                              tpmIdle, 1);
-             break;
-         }
-         break;
-     case A_CRB_CTRL_CANCEL:
-         if (val == CRB_CANCEL_INVOKE &&
--            s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
-+            regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
-             tpm_backend_cancel_cmd(s->tpmbe);
-         }
-         break;
-     case A_CRB_CTRL_START:
-         if (val == CRB_START_INVOKE &&
--            !(s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) &&
--            tpm_crb_get_active_locty(s) == locty) {
--            void *mem = memory_region_get_ram_ptr(&s->cmdmem);
-+            !(regs[R_CRB_CTRL_START] & CRB_START_INVOKE) &&
-+            tpm_crb_get_active_locty(s, regs) == locty) {
- 
--            s->regs[R_CRB_CTRL_START] |= CRB_START_INVOKE;
-+            regs[R_CRB_CTRL_START] |= CRB_START_INVOKE;
-             s->cmd = (TPMBackendCmd) {
-                 .in = mem,
-                 .in_len = MIN(tpm_cmd_get_size(mem), s->be_buffer_size),
-@@ -110,26 +103,27 @@ static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
-             /* not loc 3 or 4 */
-             break;
-         case CRB_LOC_CTRL_RELINQUISH:
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+            ARRAY_FIELD_DP32(regs, CRB_LOC_STATE,
-                              locAssigned, 0);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+            ARRAY_FIELD_DP32(regs, CRB_LOC_STS,
-                              Granted, 0);
-             break;
-         case CRB_LOC_CTRL_REQUEST_ACCESS:
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+            ARRAY_FIELD_DP32(regs, CRB_LOC_STS,
-                              Granted, 1);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STS,
-+            ARRAY_FIELD_DP32(regs, CRB_LOC_STS,
-                              beenSeized, 0);
--            ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+            ARRAY_FIELD_DP32(regs, CRB_LOC_STATE,
-                              locAssigned, 1);
-             break;
-         }
-         break;
-     }
-+
-+    memory_region_set_dirty(&s->mmio, 0, A_CRB_DATA_BUFFER);
- }
- 
- const MemoryRegionOps tpm_crb_memory_ops = {
--    .read = tpm_crb_mmio_read,
-     .write = tpm_crb_mmio_write,
-     .endianness = DEVICE_LITTLE_ENDIAN,
-     .valid = {
-@@ -140,12 +134,16 @@ const MemoryRegionOps tpm_crb_memory_ops = {
- 
- void tpm_crb_request_completed(TPMCRBState *s, int ret)
- {
--    s->regs[R_CRB_CTRL_START] &= ~CRB_START_INVOKE;
-+    uint32_t *regs = memory_region_get_ram_ptr(&s->mmio);
-+
-+    assert(regs);
-+    regs[R_CRB_CTRL_START] &= ~CRB_START_INVOKE;
-     if (ret != 0) {
--        ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+        ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
-                          tpmSts, 1); /* fatal error */
-     }
--    memory_region_set_dirty(&s->cmdmem, 0, CRB_CTRL_CMD_SIZE);
-+
-+    memory_region_set_dirty(&s->mmio, 0, TPM_CRB_ADDR_SIZE);
- }
- 
- enum TPMVersion tpm_crb_get_version(TPMCRBState *s)
-@@ -162,45 +160,50 @@ int tpm_crb_pre_save(TPMCRBState *s)
- 
- void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
- {
-+    uint32_t *regs = memory_region_get_ram_ptr(&s->mmio);
-+
-+    assert(regs);
-     if (s->ppi_enabled) {
-         tpm_ppi_reset(&s->ppi);
-     }
-     tpm_backend_reset(s->tpmbe);
- 
--    memset(s->regs, 0, sizeof(s->regs));
-+    memset(regs, 0, TPM_CRB_ADDR_SIZE);
- 
--    ARRAY_FIELD_DP32(s->regs, CRB_LOC_STATE,
-+    ARRAY_FIELD_DP32(regs, CRB_LOC_STATE,
-                      tpmRegValidSts, 1);
--    ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
-+    ARRAY_FIELD_DP32(regs, CRB_LOC_STATE,
-+                     tpmEstablished, 1);
-+    ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
-                      tpmIdle, 1);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      InterfaceType, CRB_INTF_TYPE_CRB_ACTIVE);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      InterfaceVersion, CRB_INTF_VERSION_CRB);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      CapLocality, CRB_INTF_CAP_LOCALITY_0_ONLY);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      CapCRBIdleBypass, CRB_INTF_CAP_IDLE_FAST);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      CapDataXferSizeSupport, CRB_INTF_CAP_XFER_SIZE_64);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      CapFIFO, CRB_INTF_CAP_FIFO_NOT_SUPPORTED);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      CapCRB, CRB_INTF_CAP_CRB_SUPPORTED);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      InterfaceSelector, CRB_INTF_IF_SELECTOR_CRB);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID,
-                      RID, 0b0000);
--    ARRAY_FIELD_DP32(s->regs, CRB_INTF_ID2,
-+    ARRAY_FIELD_DP32(regs, CRB_INTF_ID2,
-                      VID, PCI_VENDOR_ID_IBM);
- 
-     baseaddr += A_CRB_DATA_BUFFER;
--    s->regs[R_CRB_CTRL_CMD_SIZE] = CRB_CTRL_CMD_SIZE;
--    s->regs[R_CRB_CTRL_CMD_LADDR] = (uint32_t)baseaddr;
--    s->regs[R_CRB_CTRL_CMD_HADDR] = (uint32_t)(baseaddr >> 32);
--    s->regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
--    s->regs[R_CRB_CTRL_RSP_LADDR] = (uint32_t)baseaddr;
--    s->regs[R_CRB_CTRL_RSP_HADDR] = (uint32_t)(baseaddr >> 32);
-+    regs[R_CRB_CTRL_CMD_SIZE] = CRB_CTRL_CMD_SIZE;
-+    regs[R_CRB_CTRL_CMD_LADDR] = (uint32_t)baseaddr;
-+    regs[R_CRB_CTRL_CMD_HADDR] = (uint32_t)(baseaddr >> 32);
-+    regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
-+    regs[R_CRB_CTRL_RSP_LADDR] = (uint32_t)baseaddr;
-+    regs[R_CRB_CTRL_RSP_HADDR] = (uint32_t)(baseaddr >> 32);
- 
-     s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe),
-                             CRB_CTRL_CMD_SIZE);
-@@ -208,14 +211,15 @@ void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
-     if (tpm_backend_startup_tpm(s->tpmbe, s->be_buffer_size) < 0) {
-         exit(1);
-     }
-+
-+    memory_region_rom_device_set_romd(&s->mmio, true);
-+    memory_region_set_dirty(&s->mmio, 0, TPM_CRB_ADDR_SIZE);
- }
- 
- void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp)
- {
--    memory_region_init_io(&s->mmio, obj, &tpm_crb_memory_ops, s,
--        "tpm-crb-mmio", sizeof(s->regs));
--    memory_region_init_ram(&s->cmdmem, obj,
--        "tpm-crb-cmd", CRB_CTRL_CMD_SIZE, errp);
-+    memory_region_init_rom_device(&s->mmio, obj, &tpm_crb_memory_ops, s,
-+        "tpm-crb-mmio", TPM_CRB_ADDR_SIZE, errp);
-     if (s->ppi_enabled) {
-         tpm_ppi_init_memory(&s->ppi, obj);
-     }
--- 
-2.39.2 (Apple Git-143)
-
-From a76d3a9f0586cc9ca1156beff70108461a78936c Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Tue, 11 Jul 2023 23:06:38 -0700
-Subject: [PATCH 05/12] tpm_crb: use the ISA bus
-
-Since this device is gated to only build for targets with the PC
-configuration, we should use the ISA bus like with TPM TIS.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/tpm/Kconfig   |  2 +-
- hw/tpm/tpm_crb.c | 52 ++++++++++++++++++++++++------------------------
- 2 files changed, 27 insertions(+), 27 deletions(-)
-
-diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
-index 29e82f3c92..d75319e8e5 100644
---- a/hw/tpm/Kconfig
-+++ b/hw/tpm/Kconfig
-@@ -15,7 +15,7 @@ config TPM_TIS
- 
- config TPM_CRB
-     bool
--    depends on TPM && PC
-+    depends on TPM && ISA_BUS
-     select TPM_BACKEND
- 
- config TPM_SPAPR
-diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
-index 07c6868d8d..6144081d30 100644
---- a/hw/tpm/tpm_crb.c
-+++ b/hw/tpm/tpm_crb.c
-@@ -22,6 +22,7 @@
- #include "hw/qdev-properties.h"
- #include "hw/pci/pci_ids.h"
- #include "hw/acpi/tpm.h"
-+#include "hw/isa/isa.h"
- #include "migration/vmstate.h"
- #include "sysemu/tpm_backend.h"
- #include "sysemu/tpm_util.h"
-@@ -34,7 +35,7 @@
- #include "tpm_crb.h"
- 
- struct CRBState {
--    DeviceState parent_obj;
-+    ISADevice parent_obj;
- 
-     TPMCRBState state;
- };
-@@ -43,49 +44,49 @@ typedef struct CRBState CRBState;
- DECLARE_INSTANCE_CHECKER(CRBState, CRB,
-                          TYPE_TPM_CRB)
- 
--static void tpm_crb_none_request_completed(TPMIf *ti, int ret)
-+static void tpm_crb_isa_request_completed(TPMIf *ti, int ret)
- {
-     CRBState *s = CRB(ti);
- 
-     tpm_crb_request_completed(&s->state, ret);
- }
- 
--static enum TPMVersion tpm_crb_none_get_version(TPMIf *ti)
-+static enum TPMVersion tpm_crb_isa_get_version(TPMIf *ti)
- {
-     CRBState *s = CRB(ti);
- 
-     return tpm_crb_get_version(&s->state);
- }
- 
--static int tpm_crb_none_pre_save(void *opaque)
-+static int tpm_crb_isa_pre_save(void *opaque)
- {
-     CRBState *s = opaque;
- 
-     return tpm_crb_pre_save(&s->state);
- }
- 
--static const VMStateDescription vmstate_tpm_crb_none = {
-+static const VMStateDescription vmstate_tpm_crb_isa = {
-     .name = "tpm-crb",
--    .pre_save = tpm_crb_none_pre_save,
-+    .pre_save = tpm_crb_isa_pre_save,
-     .fields = (VMStateField[]) {
-         VMSTATE_END_OF_LIST(),
-     }
- };
- 
--static Property tpm_crb_none_properties[] = {
-+static Property tpm_crb_isa_properties[] = {
-     DEFINE_PROP_TPMBE("tpmdev", CRBState, state.tpmbe),
-     DEFINE_PROP_BOOL("ppi", CRBState, state.ppi_enabled, true),
-     DEFINE_PROP_END_OF_LIST(),
- };
- 
--static void tpm_crb_none_reset(void *dev)
-+static void tpm_crb_isa_reset(void *dev)
- {
-     CRBState *s = CRB(dev);
- 
-     return tpm_crb_reset(&s->state, TPM_CRB_ADDR_BASE);
- }
- 
--static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
-+static void tpm_crb_isa_realize(DeviceState *dev, Error **errp)
- {
-     CRBState *s = CRB(dev);
- 
-@@ -100,52 +101,51 @@ static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
- 
-     tpm_crb_init_memory(OBJECT(s), &s->state, errp);
- 
--    memory_region_add_subregion(get_system_memory(),
-+    memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
-         TPM_CRB_ADDR_BASE, &s->state.mmio);
- 
-     if (s->state.ppi_enabled) {
--        memory_region_add_subregion(get_system_memory(),
-+        memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
-             TPM_PPI_ADDR_BASE, &s->state.ppi.ram);
-     }
- 
-     if (xen_enabled()) {
--        tpm_crb_none_reset(dev);
-+        tpm_crb_isa_reset(dev);
-     } else {
--        qemu_register_reset(tpm_crb_none_reset, dev);
-+        qemu_register_reset(tpm_crb_isa_reset, dev);
-     }
- }
- 
--static void tpm_crb_none_class_init(ObjectClass *klass, void *data)
-+static void tpm_crb_isa_class_init(ObjectClass *klass, void *data)
- {
-     DeviceClass *dc = DEVICE_CLASS(klass);
-     TPMIfClass *tc = TPM_IF_CLASS(klass);
- 
--    dc->realize = tpm_crb_none_realize;
--    device_class_set_props(dc, tpm_crb_none_properties);
--    dc->vmsd  = &vmstate_tpm_crb_none;
-+    dc->realize = tpm_crb_isa_realize;
-+    device_class_set_props(dc, tpm_crb_isa_properties);
-+    dc->vmsd  = &vmstate_tpm_crb_isa;
-     dc->user_creatable = true;
-     tc->model = TPM_MODEL_TPM_CRB;
--    tc->get_version = tpm_crb_none_get_version;
--    tc->request_completed = tpm_crb_none_request_completed;
-+    tc->get_version = tpm_crb_isa_get_version;
-+    tc->request_completed = tpm_crb_isa_request_completed;
- 
-     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
- }
- 
--static const TypeInfo tpm_crb_none_info = {
-+static const TypeInfo tpm_crb_isa_info = {
-     .name = TYPE_TPM_CRB,
--    /* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
--    .parent = TYPE_DEVICE,
-+    .parent = TYPE_ISA_DEVICE,
-     .instance_size = sizeof(CRBState),
--    .class_init  = tpm_crb_none_class_init,
-+    .class_init  = tpm_crb_isa_class_init,
-     .interfaces = (InterfaceInfo[]) {
-         { TYPE_TPM_IF },
-         { }
-     }
- };
- 
--static void tpm_crb_none_register(void)
-+static void tpm_crb_isa_register(void)
- {
--    type_register_static(&tpm_crb_none_info);
-+    type_register_static(&tpm_crb_isa_info);
- }
- 
--type_init(tpm_crb_none_register)
-+type_init(tpm_crb_isa_register)
--- 
-2.39.2 (Apple Git-143)
-
-From e6a6e7ec15da74e20d6103415b36d7c5455d1e4f Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Tue, 11 Jul 2023 23:20:55 -0700
-Subject: [PATCH 06/12] tpm_crb: move ACPI table building to device interface
-
-This logic is similar to TPM TIS ISA device.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/i386/acpi-build.c | 23 -----------------------
- hw/tpm/tpm_crb.c     | 28 ++++++++++++++++++++++++++++
- 2 files changed, 28 insertions(+), 23 deletions(-)
-
-diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
-index ec857a117e..5d26a4a021 100644
---- a/hw/i386/acpi-build.c
-+++ b/hw/i386/acpi-build.c
-@@ -1441,9 +1441,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
-     uint32_t nr_mem = machine->ram_slots;
-     int root_bus_limit = 0xFF;
-     PCIBus *bus = NULL;
--#ifdef CONFIG_TPM
--    TPMIf *tpm = tpm_find();
--#endif
-     bool cxl_present = false;
-     int i;
-     VMBusBridge *vmbus_bridge = vmbus_bridge_find();
-@@ -1793,26 +1790,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
-         }
-     }
- 
--#ifdef CONFIG_TPM
--    if (TPM_IS_CRB(tpm)) {
--        dev = aml_device("TPM");
--        aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
--        aml_append(dev, aml_name_decl("_STR",
--                                      aml_string("TPM 2.0 Device")));
--        crs = aml_resource_template();
--        aml_append(crs, aml_memory32_fixed(TPM_CRB_ADDR_BASE,
--                                           TPM_CRB_ADDR_SIZE, AML_READ_WRITE));
--        aml_append(dev, aml_name_decl("_CRS", crs));
--
--        aml_append(dev, aml_name_decl("_STA", aml_int(0xf)));
--        aml_append(dev, aml_name_decl("_UID", aml_int(1)));
--
--        tpm_build_ppi_acpi(tpm, dev);
--
--        aml_append(sb_scope, dev);
--    }
--#endif
--
-     if (pcms->sgx_epc.size != 0) {
-         uint64_t epc_base = pcms->sgx_epc.base;
-         uint64_t epc_size = pcms->sgx_epc.size;
-diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
-index 6144081d30..14feb9857f 100644
---- a/hw/tpm/tpm_crb.c
-+++ b/hw/tpm/tpm_crb.c
-@@ -19,6 +19,8 @@
- #include "qemu/module.h"
- #include "qapi/error.h"
- #include "exec/address-spaces.h"
-+#include "hw/acpi/acpi_aml_interface.h"
-+#include "hw/acpi/tpm.h"
- #include "hw/qdev-properties.h"
- #include "hw/pci/pci_ids.h"
- #include "hw/acpi/tpm.h"
-@@ -116,10 +118,34 @@ static void tpm_crb_isa_realize(DeviceState *dev, Error **errp)
-     }
- }
- 
-+static void build_tpm_crb_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
-+{
-+    Aml *dev, *crs;
-+    CRBState *s = CRB(adev);
-+    TPMIf *ti = TPM_IF(s);
-+
-+    dev = aml_device("TPM");
-+    if (tpm_crb_isa_get_version(ti) == TPM_VERSION_2_0) {
-+        aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-+        aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
-+    } else {
-+        aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
-+    }
-+    aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-+    aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
-+    crs = aml_resource_template();
-+    aml_append(crs, aml_memory32_fixed(TPM_CRB_ADDR_BASE, TPM_CRB_ADDR_SIZE,
-+                                      AML_READ_WRITE));
-+    aml_append(dev, aml_name_decl("_CRS", crs));
-+    tpm_build_ppi_acpi(ti, dev);
-+    aml_append(scope, dev);
-+}
-+
- static void tpm_crb_isa_class_init(ObjectClass *klass, void *data)
- {
-     DeviceClass *dc = DEVICE_CLASS(klass);
-     TPMIfClass *tc = TPM_IF_CLASS(klass);
-+    AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
- 
-     dc->realize = tpm_crb_isa_realize;
-     device_class_set_props(dc, tpm_crb_isa_properties);
-@@ -128,6 +154,7 @@ static void tpm_crb_isa_class_init(ObjectClass *klass, void *data)
-     tc->model = TPM_MODEL_TPM_CRB;
-     tc->get_version = tpm_crb_isa_get_version;
-     tc->request_completed = tpm_crb_isa_request_completed;
-+    adevc->build_dev_aml = build_tpm_crb_isa_aml;
- 
-     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
- }
-@@ -139,6 +166,7 @@ static const TypeInfo tpm_crb_isa_info = {
-     .class_init  = tpm_crb_isa_class_init,
-     .interfaces = (InterfaceInfo[]) {
-         { TYPE_TPM_IF },
-+        { TYPE_ACPI_DEV_AML_IF },
-         { }
-     }
- };
--- 
-2.39.2 (Apple Git-143)
-
-From b5cc253f1b169809899870d6ccbae8a1e90b9418 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Wed, 12 Jul 2023 17:24:09 -0700
-Subject: [PATCH 07/12] hw/arm/virt: add plug handler for TPM on SysBus
-
-TPM needs to know its own base address in order to generate its DSDT
-device entry.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/arm/virt.c | 36 ++++++++++++++++++++++++++++++++++++
- 1 file changed, 36 insertions(+)
-
-diff --git a/hw/arm/virt.c b/hw/arm/virt.c
-index 6ab8f7f16c..bd34cefbb4 100644
---- a/hw/arm/virt.c
-+++ b/hw/arm/virt.c
-@@ -2802,6 +2802,36 @@ static void virt_virtio_md_pci_unplug_request(HotplugHandler *hotplug_dev,
-     error_setg(errp, "virtio based memory devices cannot be unplugged.");
- }
- 
-+#ifdef CONFIG_TPM
-+static void virt_tpm_plug(VirtMachineState *vms, TPMIf *tpmif)
-+{
-+    PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
-+    hwaddr pbus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
-+    SysBusDevice *sbdev = SYS_BUS_DEVICE(tpmif);
-+    MemoryRegion *sbdev_mr;
-+    hwaddr tpm_base;
-+    uint64_t tpm_size;
-+
-+    if (!sbdev || !object_dynamic_cast(OBJECT(sbdev), TYPE_SYS_BUS_DEVICE)) {
-+        return;
-+    }
-+
-+    tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
-+    assert(tpm_base != -1);
-+
-+    tpm_base += pbus_base;
-+
-+    sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
-+    tpm_size = memory_region_size(sbdev_mr);
-+
-+    if (object_property_find(OBJECT(sbdev), "baseaddr")) {
-+        object_property_set_uint(OBJECT(sbdev), "baseaddr", tpm_base, NULL);
-+    }
-+    if (object_property_find(OBJECT(sbdev), "size")) {
-+        object_property_set_uint(OBJECT(sbdev), "size", tpm_size, NULL);
-+    }
-+}
-+#endif
- 
- static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
-                                             DeviceState *dev, Error **errp)
-@@ -2875,6 +2905,12 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
-         vms->virtio_iommu_bdf = pci_get_bdf(pdev);
-         create_virtio_iommu_dt_bindings(vms);
-     }
-+
-+#ifdef CONFIG_TPM
-+    if (object_dynamic_cast(OBJECT(dev), TYPE_TPM_IF)) {
-+        virt_tpm_plug(vms, TPM_IF(dev));
-+    }
-+#endif
- }
- 
- static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
--- 
-2.39.2 (Apple Git-143)
-
-From eaf1c0e30560ef54ef34d94f6b249d636bcfca1c Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Wed, 12 Jul 2023 17:33:11 -0700
-Subject: [PATCH 08/12] hw/loongarch/virt: add plug handler for TPM on SysBus
-
-TPM needs to know its own base address in order to generate its DSDT
-device entry.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/loongarch/virt.c | 37 +++++++++++++++++++++++++++++++++++++
- 1 file changed, 37 insertions(+)
-
-diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
-index f4bf14c1c8..5f49f47917 100644
---- a/hw/loongarch/virt.c
-+++ b/hw/loongarch/virt.c
-@@ -988,6 +988,37 @@ static void virt_mem_plug(HotplugHandler *hotplug_dev,
-                          dev, &error_abort);
- }
- 
-+#ifdef CONFIG_TPM
-+static void virt_tpm_plug(LoongArchMachineState *lams, TPMIf *tpmif)
-+{
-+    PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(lams->platform_bus_dev);
-+    hwaddr pbus_base = VIRT_PLATFORM_BUS_BASEADDRESS;
-+    SysBusDevice *sbdev = SYS_BUS_DEVICE(tpmif);
-+    MemoryRegion *sbdev_mr;
-+    hwaddr tpm_base;
-+    uint64_t tpm_size;
-+
-+    if (!sbdev || !object_dynamic_cast(OBJECT(sbdev), TYPE_SYS_BUS_DEVICE)) {
-+        return;
-+    }
-+
-+    tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
-+    assert(tpm_base != -1);
-+
-+    tpm_base += pbus_base;
-+
-+    sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
-+    tpm_size = memory_region_size(sbdev_mr);
-+
-+    if (object_property_find(OBJECT(sbdev), "baseaddr")) {
-+        object_property_set_uint(OBJECT(sbdev), "baseaddr", tpm_base, NULL);
-+    }
-+    if (object_property_find(OBJECT(sbdev), "size")) {
-+        object_property_set_uint(OBJECT(sbdev), "size", tpm_size, NULL);
-+    }
-+}
-+#endif
-+
- static void loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev,
-                                         DeviceState *dev, Error **errp)
- {
-@@ -1002,6 +1033,12 @@ static void loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev,
-     } else if (memhp_type_supported(dev)) {
-         virt_mem_plug(hotplug_dev, dev, errp);
-     }
-+
-+#ifdef CONFIG_TPM
-+    if (object_dynamic_cast(OBJECT(dev), TYPE_TPM_IF)) {
-+        virt_tpm_plug(lams, TPM_IF(dev));
-+    }
-+#endif
- }
- 
- static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
--- 
-2.39.2 (Apple Git-143)
-
-From a91f8ccc906521d39e0b5acacee99ebddd4c0c93 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Wed, 12 Jul 2023 17:37:11 -0700
-Subject: [PATCH 09/12] tpm_tis_sysbus: fix crash when PPI is enabled
-
-If 'ppi' property is set, then `tpm_ppi_reset` is called on reset
-which SEGFAULTs because `tpmppi->buf` is not allocated.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/tpm/tpm_tis_sysbus.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
-index 45e63efd63..1014d5d993 100644
---- a/hw/tpm/tpm_tis_sysbus.c
-+++ b/hw/tpm/tpm_tis_sysbus.c
-@@ -124,6 +124,10 @@ static void tpm_tis_sysbus_realizefn(DeviceState *dev, Error **errp)
-         error_setg(errp, "'tpmdev' property is required");
-         return;
-     }
-+
-+    if (s->ppi_enabled) {
-+        sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->ppi.ram);
-+    }
- }
- 
- static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
--- 
-2.39.2 (Apple Git-143)
-
-From cdae40315783a593d597df5df9e6cb06e41d3734 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Wed, 12 Jul 2023 18:30:14 -0700
-Subject: [PATCH 10/12] tpm_tis_sysbus: move DSDT AML generation to device
-
-This reduces redundent code in different machine types with ACPI table
-generation. Additionally, this will allow us to support multiple TPM
-interfaces. Finally, this matches up with the TPM TIS ISA
-implementation.
-
-Ideally, we would be able to call `qbus_build_aml` and avoid any TPM
-specific code in the ACPI table generation. However, currently we
-still have to call `build_tpm2` anyways and it does not look like
-most other ACPI devices support the `ACPI_DEV_AML_IF` interface.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- hw/arm/virt-acpi-build.c  | 38 ++------------------------------------
- hw/loongarch/acpi-build.c | 38 ++------------------------------------
- hw/tpm/tpm_tis_sysbus.c   | 39 +++++++++++++++++++++++++++++++++++++++
- 3 files changed, 43 insertions(+), 72 deletions(-)
-
-diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
-index 4156111d49..c1d2ec6822 100644
---- a/hw/arm/virt-acpi-build.c
-+++ b/hw/arm/virt-acpi-build.c
-@@ -34,6 +34,7 @@
- #include "target/arm/cpu.h"
- #include "hw/acpi/acpi-defs.h"
- #include "hw/acpi/acpi.h"
-+#include "hw/acpi/acpi_aml_interface.h"
- #include "hw/nvram/fw_cfg.h"
- #include "hw/acpi/bios-linker-loader.h"
- #include "hw/acpi/aml-build.h"
-@@ -207,41 +208,6 @@ static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap,
-     aml_append(scope, dev);
- }
- 
--#ifdef CONFIG_TPM
--static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
--{
--    PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
--    hwaddr pbus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
--    SysBusDevice *sbdev = SYS_BUS_DEVICE(tpm_find());
--    MemoryRegion *sbdev_mr;
--    hwaddr tpm_base;
--
--    if (!sbdev) {
--        return;
--    }
--
--    tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
--    assert(tpm_base != -1);
--
--    tpm_base += pbus_base;
--
--    sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
--
--    Aml *dev = aml_device("TPM0");
--    aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
--    aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
--    aml_append(dev, aml_name_decl("_UID", aml_int(0)));
--
--    Aml *crs = aml_resource_template();
--    aml_append(crs,
--               aml_memory32_fixed(tpm_base,
--                                  (uint32_t)memory_region_size(sbdev_mr),
--                                  AML_READ_WRITE));
--    aml_append(dev, aml_name_decl("_CRS", crs));
--    aml_append(scope, dev);
--}
--#endif
--
- #define ID_MAPPING_ENTRY_SIZE 20
- #define SMMU_V3_ENTRY_SIZE 68
- #define ROOT_COMPLEX_ENTRY_SIZE 36
-@@ -890,7 +856,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
- 
-     acpi_dsdt_add_power_button(scope);
- #ifdef CONFIG_TPM
--    acpi_dsdt_add_tpm(scope, vms);
-+    call_dev_aml_func(DEVICE(tpm_find()), scope);
- #endif
- 
-     aml_append(dsdt, scope);
-diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
-index 8e3ce07367..3a557e31ba 100644
---- a/hw/loongarch/acpi-build.c
-+++ b/hw/loongarch/acpi-build.c
-@@ -14,6 +14,7 @@
- #include "target/loongarch/cpu.h"
- #include "hw/acpi/acpi-defs.h"
- #include "hw/acpi/acpi.h"
-+#include "hw/acpi/acpi_aml_interface.h"
- #include "hw/nvram/fw_cfg.h"
- #include "hw/acpi/bios-linker-loader.h"
- #include "migration/vmstate.h"
-@@ -297,41 +298,6 @@ static void build_flash_aml(Aml *scope, LoongArchMachineState *lams)
-     aml_append(scope, dev);
- }
- 
--#ifdef CONFIG_TPM
--static void acpi_dsdt_add_tpm(Aml *scope, LoongArchMachineState *vms)
--{
--    PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
--    hwaddr pbus_base = VIRT_PLATFORM_BUS_BASEADDRESS;
--    SysBusDevice *sbdev = SYS_BUS_DEVICE(tpm_find());
--    MemoryRegion *sbdev_mr;
--    hwaddr tpm_base;
--
--    if (!sbdev) {
--        return;
--    }
--
--    tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
--    assert(tpm_base != -1);
--
--    tpm_base += pbus_base;
--
--    sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
--
--    Aml *dev = aml_device("TPM0");
--    aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
--    aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
--    aml_append(dev, aml_name_decl("_UID", aml_int(0)));
--
--    Aml *crs = aml_resource_template();
--    aml_append(crs,
--               aml_memory32_fixed(tpm_base,
--                                  (uint32_t)memory_region_size(sbdev_mr),
--                                  AML_READ_WRITE));
--    aml_append(dev, aml_name_decl("_CRS", crs));
--    aml_append(scope, dev);
--}
--#endif
--
- /* build DSDT */
- static void
- build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine)
-@@ -348,7 +314,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine)
-     build_la_ged_aml(dsdt, machine);
-     build_flash_aml(dsdt, lams);
- #ifdef CONFIG_TPM
--    acpi_dsdt_add_tpm(dsdt, lams);
-+    call_dev_aml_func(DEVICE(tpm_find()), dsdt);
- #endif
-     /* System State Package */
-     scope = aml_scope("\\");
-diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
-index 1014d5d993..a00f1a0105 100644
---- a/hw/tpm/tpm_tis_sysbus.c
-+++ b/hw/tpm/tpm_tis_sysbus.c
-@@ -30,6 +30,7 @@
- #include "hw/sysbus.h"
- #include "tpm_tis.h"
- #include "qom/object.h"
-+#include "hw/acpi/acpi_aml_interface.h"
- 
- struct TPMStateSysBus {
-     /*< private >*/
-@@ -37,6 +38,8 @@ struct TPMStateSysBus {
- 
-     /*< public >*/
-     TPMState state; /* not a QOM object */
-+    uint64_t baseaddr;
-+    uint64_t size;
- };
- 
- OBJECT_DECLARE_SIMPLE_TYPE(TPMStateSysBus, TPM_TIS_SYSBUS)
-@@ -94,6 +97,8 @@ static Property tpm_tis_sysbus_properties[] = {
-     DEFINE_PROP_UINT32("irq", TPMStateSysBus, state.irq_num, TPM_TIS_IRQ),
-     DEFINE_PROP_TPMBE("tpmdev", TPMStateSysBus, state.be_driver),
-     DEFINE_PROP_BOOL("ppi", TPMStateSysBus, state.ppi_enabled, false),
-+    DEFINE_PROP_UINT64("baseaddr", TPMStateSysBus, baseaddr, TPM_TIS_ADDR_BASE),
-+    DEFINE_PROP_UINT64("size", TPMStateSysBus, size, TPM_TIS_ADDR_SIZE),
-     DEFINE_PROP_END_OF_LIST(),
- };
- 
-@@ -130,10 +135,42 @@ static void tpm_tis_sysbus_realizefn(DeviceState *dev, Error **errp)
-     }
- }
- 
-+static void build_tpm_tis_sysbus_aml(AcpiDevAmlIf *adev, Aml *scope)
-+{
-+    Aml *dev, *crs;
-+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(adev);
-+    TPMIf *ti = TPM_IF(sbdev);
-+
-+    dev = aml_device("TPM");
-+    if (tpm_tis_sysbus_get_tpm_version(ti) == TPM_VERSION_2_0) {
-+        aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-+        aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
-+    } else {
-+        aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
-+    }
-+    aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-+    aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
-+    crs = aml_resource_template();
-+    aml_append(crs, aml_memory32_fixed(sbdev->baseaddr, sbdev->size,
-+                                      AML_READ_WRITE));
-+    /*
-+     * FIXME: TPM_TIS_IRQ=5 conflicts with PNP0C0F irqs,
-+     * fix default TPM_TIS_IRQ value there to use some unused IRQ
-+     */
-+    /* aml_append(crs, aml_irq_no_flags(sbdev->state.irq_num)); */
-+    aml_append(dev, aml_name_decl("_CRS", crs));
-+    /**
-+     * FIXME: PPI needs to also get a dynamic address.
-+     */
-+    /* tpm_build_ppi_acpi(ti, dev); */
-+    aml_append(scope, dev);
-+}
-+
- static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
- {
-     DeviceClass *dc = DEVICE_CLASS(klass);
-     TPMIfClass *tc = TPM_IF_CLASS(klass);
-+    AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
- 
-     device_class_set_props(dc, tpm_tis_sysbus_properties);
-     dc->vmsd  = &vmstate_tpm_tis_sysbus;
-@@ -144,6 +181,7 @@ static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
-     tc->request_completed = tpm_tis_sysbus_request_completed;
-     tc->get_version = tpm_tis_sysbus_get_tpm_version;
-     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-+    adevc->build_dev_aml = build_tpm_tis_sysbus_aml;
- }
- 
- static const TypeInfo tpm_tis_sysbus_info = {
-@@ -154,6 +192,7 @@ static const TypeInfo tpm_tis_sysbus_info = {
-     .class_init  = tpm_tis_sysbus_class_init,
-     .interfaces = (InterfaceInfo[]) {
-         { TYPE_TPM_IF },
-+        { TYPE_ACPI_DEV_AML_IF },
-         { }
-     }
- };
--- 
-2.39.2 (Apple Git-143)
-
-From 64930d62565e47ad432b2c6a9320b62bbd798341 Mon Sep 17 00:00:00 2001
-From: Joelle van Dyne <j@getutm.app>
-Date: Wed, 12 Jul 2023 18:41:07 -0700
-Subject: [PATCH 11/12] tpm_crb_sysbus: introduce TPM CRB SysBus device
-
-This SysBus variant of the CRB interface supports dynamically locating
-the MMIO interface so that Virt machines can use it. This interface
-is currently the only one supported by QEMU that works on Windows 11
-ARM64. We largely follow the TPM TIS SysBus device as a template.
-
-Signed-off-by: Joelle van Dyne <j@getutm.app>
----
- docs/specs/tpm.rst          |   1 +
- hw/acpi/aml-build.c         |   7 +-
- hw/arm/Kconfig              |   1 +
- hw/arm/virt.c               |   1 +
- hw/core/sysbus-fdt.c        |   1 +
- hw/loongarch/virt.c         |   1 +
- hw/riscv/Kconfig            |   1 +
- hw/riscv/virt.c             |   1 +
- hw/tpm/Kconfig              |   5 +
- hw/tpm/meson.build          |   2 +
- hw/tpm/tpm_crb_common.c     |   2 +-
- hw/tpm/tpm_crb_sysbus.c     | 178 ++++++++++++++++++++++++++++++++++++
- include/hw/acpi/aml-build.h |   1 +
- include/sysemu/tpm.h        |   3 +
- 14 files changed, 203 insertions(+), 2 deletions(-)
- create mode 100644 hw/tpm/tpm_crb_sysbus.c
-
-diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst
-index ab79da0ecb..a815b868bb 100644
---- a/docs/specs/tpm.rst
-+++ b/docs/specs/tpm.rst
-@@ -42,6 +42,7 @@ operating system.
- QEMU files related to TPM CRB interface:
-  - ``hw/tpm/tpm_crb.c``
-  - ``hw/tpm/tpm_crb_common.c``
-+ - ``hw/tpm/tpm_crb_sysbus.c``
- 
- SPAPR interface
- ---------------
-diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
-index ea331a20d1..f809137fc9 100644
---- a/hw/acpi/aml-build.c
-+++ b/hw/acpi/aml-build.c
-@@ -31,6 +31,7 @@
- #include "hw/pci/pci_bus.h"
- #include "hw/pci/pci_bridge.h"
- #include "qemu/cutils.h"
-+#include "qom/object.h"
- 
- static GArray *build_alloc_array(void)
- {
-@@ -2218,7 +2219,7 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
- {
-     uint8_t start_method_params[12] = {};
-     unsigned log_addr_offset;
--    uint64_t control_area_start_address;
-+    uint64_t baseaddr, control_area_start_address;
-     TPMIf *tpmif = tpm_find();
-     uint32_t start_method;
-     AcpiTable table = { .sig = "TPM2", .rev = 4,
-@@ -2236,6 +2237,10 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog,
-     } else if (TPM_IS_CRB(tpmif)) {
-         control_area_start_address = TPM_CRB_ADDR_CTRL;
-         start_method = TPM2_START_METHOD_CRB;
-+    } else if (TPM_IS_CRB_SYSBUS(tpmif)) {
-+        baseaddr = object_property_get_uint(OBJECT(tpmif), "baseaddr", NULL);
-+        control_area_start_address = baseaddr + A_CRB_CTRL_REQ;
-+        start_method = TPM2_START_METHOD_CRB;
-     } else {
-         g_assert_not_reached();
-     }
-diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
-index b5aed4aff5..08b3fdf37a 100644
---- a/hw/arm/Kconfig
-+++ b/hw/arm/Kconfig
-@@ -5,6 +5,7 @@ config ARM_VIRT
-     imply VFIO_AMD_XGBE
-     imply VFIO_PLATFORM
-     imply VFIO_XGMAC
-+    imply TPM_CRB_SYSBUS
-     imply TPM_TIS_SYSBUS
-     imply NVDIMM
-     select ARM_GIC
-diff --git a/hw/arm/virt.c b/hw/arm/virt.c
-index bd34cefbb4..51536e1553 100644
---- a/hw/arm/virt.c
-+++ b/hw/arm/virt.c
-@@ -3045,6 +3045,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
- #ifdef CONFIG_TPM
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
-+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_CRB_SYSBUS);
- #endif
-     mc->block_default_type = IF_VIRTIO;
-     mc->no_cdrom = 1;
-diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
-index eebcd28f9a..9c783f88eb 100644
---- a/hw/core/sysbus-fdt.c
-+++ b/hw/core/sysbus-fdt.c
-@@ -493,6 +493,7 @@ static const BindingEntry bindings[] = {
- #endif
- #ifdef CONFIG_TPM
-     TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
-+    TYPE_BINDING(TYPE_TPM_CRB_SYSBUS, no_fdt_node),
- #endif
-     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
-     TYPE_BINDING("", NULL), /* last element */
-diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
-index 5f49f47917..4d0ee7f11e 100644
---- a/hw/loongarch/virt.c
-+++ b/hw/loongarch/virt.c
-@@ -1083,6 +1083,7 @@ static void loongarch_class_init(ObjectClass *oc, void *data)
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
- #ifdef CONFIG_TPM
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
-+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_CRB_SYSBUS);
- #endif
- }
- 
-diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
-index 6528ebfa3a..a43ba022e1 100644
---- a/hw/riscv/Kconfig
-+++ b/hw/riscv/Kconfig
-@@ -29,6 +29,7 @@ config RISCV_VIRT
-     imply PCI_DEVICES
-     imply VIRTIO_VGA
-     imply TEST_DEVICES
-+    imply TPM_CRB_SYSBUS
-     imply TPM_TIS_SYSBUS
-     select RISCV_NUMA
-     select GOLDFISH_RTC
-diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
-index 4e3efbee16..8385234c64 100644
---- a/hw/riscv/virt.c
-+++ b/hw/riscv/virt.c
-@@ -1687,6 +1687,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
- #ifdef CONFIG_TPM
-     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
-+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_CRB_SYSBUS);
- #endif
- 
-     object_class_property_add_bool(oc, "aclint", virt_get_aclint,
-diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
-index d75319e8e5..8347fdca4a 100644
---- a/hw/tpm/Kconfig
-+++ b/hw/tpm/Kconfig
-@@ -18,6 +18,11 @@ config TPM_CRB
-     depends on TPM && ISA_BUS
-     select TPM_BACKEND
- 
-+config TPM_CRB_SYSBUS
-+    bool
-+    depends on TPM
-+    select TPM_BACKEND
-+
- config TPM_SPAPR
-     bool
-     default y
-diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build
-index 822d3a11ca..fc99cde1a6 100644
---- a/hw/tpm/meson.build
-+++ b/hw/tpm/meson.build
-@@ -3,6 +3,8 @@ softmmu_ss.add(when: 'CONFIG_TPM_TIS_ISA', if_true: files('tpm_tis_isa.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_TIS_SYSBUS', if_true: files('tpm_tis_sysbus.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb_common.c'))
-+softmmu_ss.add(when: 'CONFIG_TPM_CRB_SYSBUS', if_true: files('tpm_crb_sysbus.c'))
-+softmmu_ss.add(when: 'CONFIG_TPM_CRB_SYSBUS', if_true: files('tpm_crb_common.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c'))
- softmmu_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c'))
- 
-diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
-index 772ddd9bb1..a7600e3aa1 100644
---- a/hw/tpm/tpm_crb_common.c
-+++ b/hw/tpm/tpm_crb_common.c
-@@ -219,7 +219,7 @@ void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
- void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp)
- {
-     memory_region_init_rom_device(&s->mmio, obj, &tpm_crb_memory_ops, s,
--        "tpm-crb-mmio", TPM_CRB_ADDR_SIZE, errp);
-+        "tpm-crb-mmio", ROUND_UP(TPM_CRB_ADDR_SIZE, qemu_host_page_size), errp);
-     if (s->ppi_enabled) {
-         tpm_ppi_init_memory(&s->ppi, obj);
-     }
-diff --git a/hw/tpm/tpm_crb_sysbus.c b/hw/tpm/tpm_crb_sysbus.c
-new file mode 100644
-index 0000000000..1289afcc7e
---- /dev/null
-+++ b/hw/tpm/tpm_crb_sysbus.c
-@@ -0,0 +1,178 @@
-+/*
-+ * tpm_crb_sysbus.c - QEMU's TPM CRB interface emulator
-+ *
-+ * Copyright (c) 2018 Red Hat, Inc.
-+ *
-+ * Authors:
-+ *   Marc-André Lureau <marcandre.lureau@redhat.com>
-+ *
-+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
-+ * See the COPYING file in the top-level directory.
-+ *
-+ * tpm_crb is a device for TPM 2.0 Command Response Buffer (CRB) Interface
-+ * as defined in TCG PC Client Platform TPM Profile (PTP) Specification
-+ * Family “2.0” Level 00 Revision 01.03 v22
-+ */
-+
-+#include "qemu/osdep.h"
-+#include "hw/acpi/acpi_aml_interface.h"
-+#include "hw/acpi/tpm.h"
-+#include "hw/qdev-properties.h"
-+#include "migration/vmstate.h"
-+#include "tpm_prop.h"
-+#include "hw/pci/pci_ids.h"
-+#include "hw/sysbus.h"
-+#include "qapi/visitor.h"
-+#include "qom/object.h"
-+#include "sysemu/tpm_util.h"
-+#include "trace.h"
-+#include "tpm_crb.h"
-+
-+struct TPMCRBStateSysBus {
-+    /*< private >*/
-+    SysBusDevice parent_obj;
-+
-+    /*< public >*/
-+    TPMCRBState state;
-+    uint64_t baseaddr;
-+    uint64_t size;
-+};
-+
-+OBJECT_DECLARE_SIMPLE_TYPE(TPMCRBStateSysBus, TPM_CRB_SYSBUS)
-+
-+static void tpm_crb_sysbus_request_completed(TPMIf *ti, int ret)
-+{
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(ti);
-+
-+    return tpm_crb_request_completed(&s->state, ret);
-+}
-+
-+static enum TPMVersion tpm_crb_sysbus_get_tpm_version(TPMIf *ti)
-+{
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(ti);
-+
-+    return tpm_crb_get_version(&s->state);
-+}
-+
-+static int tpm_crb_sysbus_pre_save(void *opaque)
-+{
-+    TPMCRBStateSysBus *s = opaque;
-+
-+    return tpm_crb_pre_save(&s->state);
-+}
-+
-+static const VMStateDescription vmstate_tpm_crb_sysbus = {
-+    .name = "tpm-crb-sysbus",
-+    .pre_save = tpm_crb_sysbus_pre_save,
-+    .fields = (VMStateField[]) {
-+        VMSTATE_END_OF_LIST(),
-+    }
-+};
-+
-+static Property tpm_crb_sysbus_properties[] = {
-+    DEFINE_PROP_TPMBE("tpmdev", TPMCRBStateSysBus, state.tpmbe),
-+    DEFINE_PROP_BOOL("ppi", TPMCRBStateSysBus, state.ppi_enabled, false),
-+    DEFINE_PROP_UINT64("baseaddr", TPMCRBStateSysBus,
-+                       baseaddr, TPM_CRB_ADDR_BASE),
-+    DEFINE_PROP_UINT64("size", TPMCRBStateSysBus, size, TPM_CRB_ADDR_SIZE),
-+    DEFINE_PROP_END_OF_LIST(),
-+};
-+
-+static void tpm_crb_sysbus_initfn(Object *obj)
-+{
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(obj);
-+
-+    tpm_crb_init_memory(obj, &s->state, NULL);
-+
-+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->state.mmio);
-+}
-+
-+static void tpm_crb_sysbus_reset(DeviceState *dev)
-+{
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(dev);
-+
-+    return tpm_crb_reset(&s->state, s->baseaddr);
-+}
-+
-+static void tpm_crb_sysbus_realizefn(DeviceState *dev, Error **errp)
-+{
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(dev);
-+
-+    if (!tpm_find()) {
-+        error_setg(errp, "at most one TPM device is permitted");
-+        return;
-+    }
-+
-+    if (!s->state.tpmbe) {
-+        error_setg(errp, "'tpmdev' property is required");
-+        return;
-+    }
-+
-+    if (s->state.ppi_enabled) {
-+        sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->state.ppi.ram);
-+    }
-+}
-+
-+static void build_tpm_crb_sysbus_aml(AcpiDevAmlIf *adev, Aml *scope)
-+{
-+    Aml *dev, *crs;
-+    TPMCRBStateSysBus *s = TPM_CRB_SYSBUS(adev);
-+    TPMIf *ti = TPM_IF(s);
-+
-+    dev = aml_device("TPM");
-+    if (tpm_crb_sysbus_get_tpm_version(ti) == TPM_VERSION_2_0) {
-+        aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-+        aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
-+    } else {
-+        aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
-+    }
-+    aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-+    aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
-+    crs = aml_resource_template();
-+    aml_append(crs, aml_memory32_fixed(s->baseaddr, s->size,
-+                                      AML_READ_WRITE));
-+    aml_append(dev, aml_name_decl("_CRS", crs));
-+    /**
-+     * FIXME: PPI needs to also get a dynamic address.
-+     */
-+    /* tpm_build_ppi_acpi(ti, dev); */
-+    aml_append(scope, dev);
-+}
-+
-+static void tpm_crb_sysbus_class_init(ObjectClass *klass, void *data)
-+{
-+    DeviceClass *dc = DEVICE_CLASS(klass);
-+    TPMIfClass *tc = TPM_IF_CLASS(klass);
-+    AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
-+
-+    device_class_set_props(dc, tpm_crb_sysbus_properties);
-+    dc->vmsd  = &vmstate_tpm_crb_sysbus;
-+    tc->model = TPM_MODEL_TPM_CRB;
-+    dc->realize = tpm_crb_sysbus_realizefn;
-+    dc->user_creatable = true;
-+    dc->reset = tpm_crb_sysbus_reset;
-+    tc->request_completed = tpm_crb_sysbus_request_completed;
-+    tc->get_version = tpm_crb_sysbus_get_tpm_version;
-+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-+    adevc->build_dev_aml = build_tpm_crb_sysbus_aml;
-+}
-+
-+static const TypeInfo tpm_crb_sysbus_info = {
-+    .name = TYPE_TPM_CRB_SYSBUS,
-+    .parent = TYPE_SYS_BUS_DEVICE,
-+    .instance_size = sizeof(TPMCRBStateSysBus),
-+    .instance_init = tpm_crb_sysbus_initfn,
-+    .class_init  = tpm_crb_sysbus_class_init,
-+    .interfaces = (InterfaceInfo[]) {
-+        { TYPE_TPM_IF },
-+        { TYPE_ACPI_DEV_AML_IF },
-+        { }
-+    }
-+};
-+
-+static void tpm_crb_sysbus_register(void)
-+{
-+    type_register_static(&tpm_crb_sysbus_info);
-+}
-+
-+type_init(tpm_crb_sysbus_register)
-diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
-index d1fb08514b..9660e16148 100644
---- a/include/hw/acpi/aml-build.h
-+++ b/include/hw/acpi/aml-build.h
-@@ -3,6 +3,7 @@
- 
- #include "hw/acpi/acpi-defs.h"
- #include "hw/acpi/bios-linker-loader.h"
-+#include "exec/hwaddr.h"
- 
- #define ACPI_BUILD_APPNAME6 "BOCHS "
- #define ACPI_BUILD_APPNAME8 "BXPC    "
-diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
-index fb40e30ff6..ec26f381d2 100644
---- a/include/sysemu/tpm.h
-+++ b/include/sysemu/tpm.h
-@@ -47,6 +47,7 @@ struct TPMIfClass {
- #define TYPE_TPM_TIS_ISA            "tpm-tis"
- #define TYPE_TPM_TIS_SYSBUS         "tpm-tis-device"
- #define TYPE_TPM_CRB                "tpm-crb"
-+#define TYPE_TPM_CRB_SYSBUS         "tpm-crb-device"
- #define TYPE_TPM_SPAPR              "tpm-spapr"
- 
- #define TPM_IS_TIS_ISA(chr)                         \
-@@ -55,6 +56,8 @@ struct TPMIfClass {
-     object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS)
- #define TPM_IS_CRB(chr)                             \
-     object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB)
-+#define TPM_IS_CRB_SYSBUS(chr)                      \
-+    object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB_SYSBUS)
- #define TPM_IS_SPAPR(chr)                           \
-     object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR)
- 
--- 
-2.39.2 (Apple Git-143)
-
-From f1e0d530d3e2892047031c0f37cab5ed6ca2bc85 Mon Sep 17 00:00:00 2001
-From: osy <osy@turing.llc>
-Date: Wed, 12 Jul 2023 23:07:49 -0700
-Subject: [PATCH 12/12] sysbus-fdt: falsely claim TPM CRB device is TPM TIS
-
-OVMF firmware is hard coded to recognize a TPM TIS device. If we lie,
-the code will properly detect that it is a CRB device. In Linux, this
-will cause the device to be not recognized properly but the separate
-ACPI device will cause it to work regardless.
----
- hw/core/sysbus-fdt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
-index 9c783f88eb..4e4253f736 100644
---- a/hw/core/sysbus-fdt.c
-+++ b/hw/core/sysbus-fdt.c
-@@ -493,7 +493,7 @@ static const BindingEntry bindings[] = {
- #endif
- #ifdef CONFIG_TPM
-     TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
--    TYPE_BINDING(TYPE_TPM_CRB_SYSBUS, no_fdt_node),
-+    TYPE_BINDING(TYPE_TPM_CRB_SYSBUS, add_tpm_tis_fdt_node),
- #endif
-     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
-     TYPE_BINDING("", NULL), /* last element */
--- 
-2.39.2 (Apple Git-143)
-
-From 605087192b82ec4fa568707dc87891b163ae3570 Mon Sep 17 00:00:00 2001
-From: osy <osy@turing.llc>
-Date: Sun, 30 Jul 2023 00:42:13 -0700
-Subject: [PATCH] error-report: disable custom log handler
-
-When using QMP over SPICE, this causes a deadlock.
----
- util/error-report.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/util/error-report.c b/util/error-report.c
-index 6e44a55732..800dbc30ad 100644
---- a/util/error-report.c
-+++ b/util/error-report.c
-@@ -394,6 +394,7 @@ void error_init(const char *argv0)
-     /* Set the program name for error_print_loc(). */
-     g_set_prgname(p ? p + 1 : argv0);
- 
-+#if 0 /* QEMU log handler disabled to prevent deadlock with SPICE logging */
-     /*
-      * This sets up glib logging so libraries using it also print their logs
-      * through error_report(), warn_report(), info_report().
-@@ -401,4 +402,5 @@ void error_init(const char *argv0)
-     g_log_set_default_handler(qemu_log_func, NULL);
-     g_warn_if_fail(qemu_glog_domains == NULL);
-     qemu_glog_domains = g_strdup(g_getenv("G_MESSAGES_DEBUG"));
-+#endif
- }
--- 
-2.39.2 (Apple Git-143)
-

+ 1 - 1
patches/sources

@@ -23,7 +23,7 @@ SPICE_SERVER_SRC="https://www.spice-space.org/download/releases/spice-server/spi
 USB_SRC="https://github.com/libusb/libusb/releases/download/v1.0.25/libusb-1.0.25.tar.bz2"
 USBREDIR_SRC="https://www.spice-space.org/download/usbredir/usbredir-0.13.0.tar.xz"
 SLIRP_SRC="https://gitlab.freedesktop.org/slirp/libslirp/-/archive/v4.7.0/libslirp-v4.7.0.tar.gz"
-QEMU_SRC="https://github.com/utmapp/qemu/releases/download/v8.0.2-utm/qemu-8.0.2-utm.tar.bz2"
+QEMU_SRC="https://github.com/utmapp/qemu/releases/download/v7.2.0-utm/qemu-7.2.0-utm.tar.bz2"
 
 # Source files for spice-client
 JSON_GLIB_SRC="https://download.gnome.org/sources/json-glib/1.6/json-glib-1.6.6.tar.xz"

部分文件因为文件数量过多而无法显示