|
@@ -0,0 +1,4569 @@
|
|
|
+From bb5c41eb13130dada2f3cd766da9a537ef466a4b Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Tue, 20 Dec 2022 15:56:07 -0800
|
|
|
+Subject: [PATCH 1/3] spice-display: fix memory leak issues
|
|
|
+
|
|
|
+1) Some of the error cases did not free GL memory.
|
|
|
+2) Remove some unneeded logic and simplify the code.
|
|
|
+3) Add a `eglMakeCurrent` to `spice_iosurface_destroy` to make sure we are
|
|
|
+ freeing objects in the right context.
|
|
|
+---
|
|
|
+ ui/egl-helpers.c | 1 +
|
|
|
+ ui/spice-display.c | 29 ++++++-----------------------
|
|
|
+ 2 files changed, 7 insertions(+), 23 deletions(-)
|
|
|
+
|
|
|
+diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
|
|
|
+index 0df9dd8fd5..a636e5f2f2 100644
|
|
|
+--- a/ui/egl-helpers.c
|
|
|
++++ b/ui/egl-helpers.c
|
|
|
+@@ -389,6 +389,7 @@ EGLSurface qemu_egl_init_buffer_surface(EGLContext ectx,
|
|
|
+ b = eglMakeCurrent(qemu_egl_display, esurface, esurface, ectx);
|
|
|
+ if (b == EGL_FALSE) {
|
|
|
+ error_report("egl: eglMakeCurrent failed");
|
|
|
++ qemu_egl_destroy_surface(esurface);
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+diff --git a/ui/spice-display.c b/ui/spice-display.c
|
|
|
+index 4e4791484c..c1d2a66fc5 100644
|
|
|
+--- a/ui/spice-display.c
|
|
|
++++ b/ui/spice-display.c
|
|
|
+@@ -847,7 +847,7 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
|
|
|
+ EGL_BIND_TO_TEXTURE_TARGET_ANGLE,
|
|
|
+ &target) != EGL_TRUE) {
|
|
|
+ error_report("spice_iosurface_create: eglGetConfigAttrib failed");
|
|
|
+- return 0;
|
|
|
++ goto gl_error;
|
|
|
+ }
|
|
|
+ if (target == EGL_TEXTURE_2D) {
|
|
|
+ tex_target = GL_TEXTURE_2D;
|
|
|
+@@ -855,7 +855,7 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
|
|
|
+ tex_target = GL_TEXTURE_RECTANGLE_ANGLE;
|
|
|
+ } else {
|
|
|
+ error_report("spice_iosurface_create: unsupported texture target");
|
|
|
+- return 0;
|
|
|
++ goto gl_error;
|
|
|
+ }
|
|
|
+
|
|
|
+ const EGLint attribs[] = {
|
|
|
+@@ -880,6 +880,8 @@ static int spice_iosurface_create(SimpleSpiceDisplay *ssd, int width, int height
|
|
|
+
|
|
|
+ egl_fb_setup_new_tex_target(&ssd->iosurface_fb, width, height, tex_target);
|
|
|
+
|
|
|
++ eglBindTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
|
|
|
++
|
|
|
+ return 1;
|
|
|
+ gl_error:
|
|
|
+ CFRelease(ssd->iosurface);
|
|
|
+@@ -897,6 +899,8 @@ static void spice_iosurface_destroy(SimpleSpiceDisplay *ssd)
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ #if defined(CONFIG_ANGLE)
|
|
|
++ eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
|
|
|
++ eglReleaseTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
|
|
|
+ egl_fb_destroy(&ssd->iosurface_fb);
|
|
|
+ qemu_egl_destroy_surface(ssd->esurface);
|
|
|
+ ssd->esurface = EGL_NO_SURFACE;
|
|
|
+@@ -963,23 +967,10 @@ static void spice_iosurface_blit(SimpleSpiceDisplay *ssd, GLuint src_texture, bo
|
|
|
+ #if defined(CONFIG_ANGLE)
|
|
|
+ eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
|
|
|
+ glBindTexture(ssd->iosurface_fb.texture_target, ssd->iosurface_fb.texture);
|
|
|
+- eglBindTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
|
|
|
+ egl_texture_blit(ssd->gls, &ssd->iosurface_fb, &tmp_fb, flip, swap);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+
|
|
|
+-static void spice_iosurface_flush(SimpleSpiceDisplay *ssd)
|
|
|
+-{
|
|
|
+- if (!ssd->iosurface) {
|
|
|
+- return;
|
|
|
+- }
|
|
|
+-
|
|
|
+-#if defined(CONFIG_ANGLE)
|
|
|
+- eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
|
|
|
+- eglReleaseTexImage(qemu_egl_display, ssd->esurface, EGL_BACK_BUFFER);
|
|
|
+-#endif
|
|
|
+-}
|
|
|
+-
|
|
|
+ #endif
|
|
|
+
|
|
|
+ static void qemu_spice_gl_monitor_config(SimpleSpiceDisplay *ssd,
|
|
|
+@@ -1043,9 +1034,6 @@ static void spice_gl_refresh(DisplayChangeListener *dcl)
|
|
|
+ graphic_hw_update(dcl->con);
|
|
|
+ if (ssd->gl_updates && ssd->have_surface) {
|
|
|
+ qemu_spice_gl_block(ssd, true);
|
|
|
+-#if defined(CONFIG_IOSURFACE)
|
|
|
+- spice_iosurface_flush(ssd);
|
|
|
+-#endif
|
|
|
+ glFlush();
|
|
|
+ cookie = (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_GL_DRAW_DONE, 0);
|
|
|
+ spice_qxl_gl_draw_async(&ssd->qxl, 0, 0,
|
|
|
+@@ -1079,10 +1067,6 @@ static void spice_gl_switch(DisplayChangeListener *dcl,
|
|
|
+ int width = 0, height = 0;
|
|
|
+
|
|
|
+ if (ssd->ds) {
|
|
|
+-#if defined(CONFIG_IOSURFACE)
|
|
|
+- // need to release texture from surface before destorying it
|
|
|
+- spice_iosurface_flush(ssd);
|
|
|
+-#endif
|
|
|
+ surface_gl_destroy_texture(ssd->gls, ssd->ds);
|
|
|
+ }
|
|
|
+ ssd->ds = new_surface;
|
|
|
+@@ -1346,7 +1330,6 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl,
|
|
|
+ GLuint tex_id = ssd->backing_borrow(ssd->backing_id, &y_0_top,
|
|
|
+ NULL, NULL);
|
|
|
+ spice_iosurface_blit(ssd, tex_id, !y_0_top, false);
|
|
|
+- spice_iosurface_flush(ssd);
|
|
|
+ //TODO: cursor stuff
|
|
|
+ #endif
|
|
|
+
|
|
|
+--
|
|
|
+2.28.0
|
|
|
+
|
|
|
+From 34b035535eee0f8497a1492ae1d9478dc9c7e7a0 Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Sat, 24 Dec 2022 17:08:52 -0800
|
|
|
+Subject: [PATCH 2/3] spice-display: remove redundant glBindTexture
|
|
|
+
|
|
|
+---
|
|
|
+ ui/spice-display.c | 1 -
|
|
|
+ 1 file changed, 1 deletion(-)
|
|
|
+
|
|
|
+diff --git a/ui/spice-display.c b/ui/spice-display.c
|
|
|
+index c1d2a66fc5..610edaa089 100644
|
|
|
+--- a/ui/spice-display.c
|
|
|
++++ b/ui/spice-display.c
|
|
|
+@@ -966,7 +966,6 @@ static void spice_iosurface_blit(SimpleSpiceDisplay *ssd, GLuint src_texture, bo
|
|
|
+
|
|
|
+ #if defined(CONFIG_ANGLE)
|
|
|
+ eglMakeCurrent(qemu_egl_display, ssd->esurface, ssd->esurface, spice_gl_ctx);
|
|
|
+- glBindTexture(ssd->iosurface_fb.texture_target, ssd->iosurface_fb.texture);
|
|
|
+ egl_texture_blit(ssd->gls, &ssd->iosurface_fb, &tmp_fb, flip, swap);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+--
|
|
|
+2.28.0
|
|
|
+
|
|
|
+From bbc1efd4ca66d0892f2bced95fc6150192585a12 Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Sun, 25 Dec 2022 00:46:42 -0800
|
|
|
+Subject: [PATCH 3/3] Revert "virtio-blk: use BDRV_REQ_REGISTERED_BUF
|
|
|
+ optimization hint"
|
|
|
+
|
|
|
+This reverts commit baf422684d73c7bf38e2c18815e18d44fcf395b6.
|
|
|
+---
|
|
|
+ hw/block/virtio-blk.c | 39 ++++++++++++----------------------
|
|
|
+ include/hw/virtio/virtio-blk.h | 2 --
|
|
|
+ 2 files changed, 14 insertions(+), 27 deletions(-)
|
|
|
+
|
|
|
+diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
|
|
+index f717550fdc..8131ec2dbc 100644
|
|
|
+--- a/hw/block/virtio-blk.c
|
|
|
++++ b/hw/block/virtio-blk.c
|
|
|
+@@ -21,7 +21,6 @@
|
|
|
+ #include "hw/block/block.h"
|
|
|
+ #include "hw/qdev-properties.h"
|
|
|
+ #include "sysemu/blockdev.h"
|
|
|
+-#include "sysemu/block-ram-registrar.h"
|
|
|
+ #include "sysemu/sysemu.h"
|
|
|
+ #include "sysemu/runstate.h"
|
|
|
+ #include "hw/virtio/virtio-blk.h"
|
|
|
+@@ -363,14 +362,12 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
|
|
|
++static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
|
|
|
+ int start, int num_reqs, int niov)
|
|
|
+ {
|
|
|
+- BlockBackend *blk = s->blk;
|
|
|
+ QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
|
|
|
+ int64_t sector_num = mrb->reqs[start]->sector_num;
|
|
|
+ bool is_write = mrb->is_write;
|
|
|
+- BdrvRequestFlags flags = 0;
|
|
|
+
|
|
|
+ if (num_reqs > 1) {
|
|
|
+ int i;
|
|
|
+@@ -401,18 +398,12 @@ static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
|
|
|
+ num_reqs - 1);
|
|
|
+ }
|
|
|
+
|
|
|
+- if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
|
|
|
+- flags |= BDRV_REQ_REGISTERED_BUF;
|
|
|
+- }
|
|
|
+-
|
|
|
+ if (is_write) {
|
|
|
+- blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
|
|
|
+- flags, virtio_blk_rw_complete,
|
|
|
+- mrb->reqs[start]);
|
|
|
++ blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
|
|
|
++ virtio_blk_rw_complete, mrb->reqs[start]);
|
|
|
+ } else {
|
|
|
+- blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
|
|
|
+- flags, virtio_blk_rw_complete,
|
|
|
+- mrb->reqs[start]);
|
|
|
++ blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
|
|
|
++ virtio_blk_rw_complete, mrb->reqs[start]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -434,14 +425,14 @@ static int multireq_compare(const void *a, const void *b)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
|
|
|
++static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
|
|
|
+ {
|
|
|
+ int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
|
|
|
+ uint32_t max_transfer;
|
|
|
+ int64_t sector_num = 0;
|
|
|
+
|
|
|
+ if (mrb->num_reqs == 1) {
|
|
|
+- submit_requests(s, mrb, 0, 1, -1);
|
|
|
++ submit_requests(blk, mrb, 0, 1, -1);
|
|
|
+ mrb->num_reqs = 0;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+@@ -461,11 +452,11 @@ static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
|
|
|
+ * 3. merge would exceed maximum transfer length of backend device
|
|
|
+ */
|
|
|
+ if (sector_num + nb_sectors != req->sector_num ||
|
|
|
+- niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
|
|
|
++ niov > blk_get_max_iov(blk) - req->qiov.niov ||
|
|
|
+ req->qiov.size > max_transfer ||
|
|
|
+ nb_sectors > (max_transfer -
|
|
|
+ req->qiov.size) / BDRV_SECTOR_SIZE) {
|
|
|
+- submit_requests(s, mrb, start, num_reqs, niov);
|
|
|
++ submit_requests(blk, mrb, start, num_reqs, niov);
|
|
|
+ num_reqs = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -481,7 +472,7 @@ static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
|
|
|
+ num_reqs++;
|
|
|
+ }
|
|
|
+
|
|
|
+- submit_requests(s, mrb, start, num_reqs, niov);
|
|
|
++ submit_requests(blk, mrb, start, num_reqs, niov);
|
|
|
+ mrb->num_reqs = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -496,7 +487,7 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
|
|
|
+ * Make sure all outstanding writes are posted to the backing device.
|
|
|
+ */
|
|
|
+ if (mrb->is_write && mrb->num_reqs > 0) {
|
|
|
+- virtio_blk_submit_multireq(s, mrb);
|
|
|
++ virtio_blk_submit_multireq(s->blk, mrb);
|
|
|
+ }
|
|
|
+ blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
|
|
|
+ }
|
|
|
+@@ -676,7 +667,7 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
|
|
|
+ if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
|
|
|
+ is_write != mrb->is_write ||
|
|
|
+ !s->conf.request_merging)) {
|
|
|
+- virtio_blk_submit_multireq(s, mrb);
|
|
|
++ virtio_blk_submit_multireq(s->blk, mrb);
|
|
|
+ }
|
|
|
+
|
|
|
+ assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
|
|
|
+@@ -783,7 +774,7 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
|
|
+ } while (!virtio_queue_empty(vq));
|
|
|
+
|
|
|
+ if (mrb.num_reqs) {
|
|
|
+- virtio_blk_submit_multireq(s, &mrb);
|
|
|
++ virtio_blk_submit_multireq(s->blk, &mrb);
|
|
|
+ }
|
|
|
+
|
|
|
+ blk_io_unplug(s->blk);
|
|
|
+@@ -832,7 +823,7 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
|
|
|
+ }
|
|
|
+
|
|
|
+ if (mrb.num_reqs) {
|
|
|
+- virtio_blk_submit_multireq(s, &mrb);
|
|
|
++ virtio_blk_submit_multireq(s->blk, &mrb);
|
|
|
+ }
|
|
|
+ if (is_bh) {
|
|
|
+ blk_dec_in_flight(s->conf.conf.blk);
|
|
|
+@@ -1214,7 +1205,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
|
|
|
+ }
|
|
|
+
|
|
|
+ s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
|
|
|
+- blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
|
|
|
+ blk_set_dev_ops(s->blk, &virtio_block_ops, s);
|
|
|
+
|
|
|
+ blk_iostatus_enable(s->blk);
|
|
|
+@@ -1240,7 +1230,6 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
|
|
|
+ virtio_del_queue(vdev, i);
|
|
|
+ }
|
|
|
+ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
|
|
|
+- blk_ram_registrar_destroy(&s->blk_ram_registrar);
|
|
|
+ qemu_del_vm_change_state_handler(s->change);
|
|
|
+ blockdev_mark_auto_del(s->blk);
|
|
|
+ virtio_cleanup(vdev);
|
|
|
+diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
|
|
|
+index 7f589b4146..d311c57cca 100644
|
|
|
+--- a/include/hw/virtio/virtio-blk.h
|
|
|
++++ b/include/hw/virtio/virtio-blk.h
|
|
|
+@@ -19,7 +19,6 @@
|
|
|
+ #include "hw/block/block.h"
|
|
|
+ #include "sysemu/iothread.h"
|
|
|
+ #include "sysemu/block-backend.h"
|
|
|
+-#include "sysemu/block-ram-registrar.h"
|
|
|
+ #include "qom/object.h"
|
|
|
+
|
|
|
+ #define TYPE_VIRTIO_BLK "virtio-blk-device"
|
|
|
+@@ -65,7 +64,6 @@ struct VirtIOBlock {
|
|
|
+ struct VirtIOBlockDataPlane *dataplane;
|
|
|
+ uint64_t host_features;
|
|
|
+ size_t config_size;
|
|
|
+- BlockRAMRegistrar blk_ram_registrar;
|
|
|
+ };
|
|
|
+
|
|
|
+ typedef struct VirtIOBlockReq {
|
|
|
+--
|
|
|
+2.28.0
|
|
|
+
|
|
|
+From 531da34587b38c64787cb25b1de1c5d13f75def8 Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Wed, 28 Dec 2022 16:50:49 -0800
|
|
|
+Subject: [PATCH] hvf: support TSO mode (private feature)
|
|
|
+
|
|
|
+Apple Silicon supports TSO mode which can be used for emulating strong
|
|
|
+memory ordering in the guest. This feature requires the private entitlement
|
|
|
+`com.apple.private.hypervisor` as well as a private function to modify
|
|
|
+ACTLR_EL1 not exposed by the public Hypervisor framework.
|
|
|
+---
|
|
|
+ accel/hvf/hvf-accel-ops.c | 51 ++++++++++++++++++++++++++---------
|
|
|
+ include/sysemu/hvf_int.h | 13 +++++++++
|
|
|
+ meson.build | 1 +
|
|
|
+ meson_options.txt | 2 ++
|
|
|
+ scripts/meson-buildoptions.sh | 3 +++
|
|
|
+ target/arm/hvf/hvf.c | 28 +++++++++++++++++++
|
|
|
+ target/i386/hvf/hvf.c | 5 ++++
|
|
|
+ 7 files changed, 90 insertions(+), 13 deletions(-)
|
|
|
+
|
|
|
+diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
|
|
|
+index 24913ca9c4..b414e240ec 100644
|
|
|
+--- a/accel/hvf/hvf-accel-ops.c
|
|
|
++++ b/accel/hvf/hvf-accel-ops.c
|
|
|
+@@ -57,13 +57,10 @@
|
|
|
+ #include "sysemu/hvf_int.h"
|
|
|
+ #include "sysemu/runstate.h"
|
|
|
+ #include "qemu/guest-random.h"
|
|
|
++#include "hw/boards.h"
|
|
|
+
|
|
|
+ HVFState *hvf_state;
|
|
|
+
|
|
|
+-#ifdef __aarch64__
|
|
|
+-#define HV_VM_DEFAULT NULL
|
|
|
+-#endif
|
|
|
+-
|
|
|
+ /* Memory slots */
|
|
|
+
|
|
|
+ hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
|
|
|
+@@ -319,25 +316,44 @@ bool hvf_allowed;
|
|
|
+
|
|
|
+ static int hvf_accel_init(MachineState *ms)
|
|
|
+ {
|
|
|
+- int x;
|
|
|
+ hv_return_t ret;
|
|
|
+- HVFState *s;
|
|
|
++ HVFState *s = HVF_STATE(ms->accelerator);
|
|
|
+
|
|
|
+- ret = hv_vm_create(HV_VM_DEFAULT);
|
|
|
++ ret = hvf_arch_vm_create(s);
|
|
|
+ assert_hvf_ok(ret);
|
|
|
+
|
|
|
+- s = g_new0(HVFState, 1);
|
|
|
++ hvf_state = s;
|
|
|
++ memory_listener_register(&hvf_memory_listener, &address_space_memory);
|
|
|
++
|
|
|
++ return hvf_arch_init();
|
|
|
++}
|
|
|
++
|
|
|
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
|
|
|
++
|
|
|
++static bool hvf_get_tso(Object *obj, Error **errp)
|
|
|
++{
|
|
|
++ HVFState *s = HVF_STATE(obj);
|
|
|
++ return s->tso_mode;
|
|
|
++}
|
|
|
++
|
|
|
++static void hvf_set_tso(Object *obj, bool value, Error **errp)
|
|
|
++{
|
|
|
++ HVFState *s = HVF_STATE(obj);
|
|
|
++ s->tso_mode = value;
|
|
|
++}
|
|
|
++
|
|
|
++#endif
|
|
|
++
|
|
|
++static void hvf_accel_instance_init(Object *obj)
|
|
|
++{
|
|
|
++ int x;
|
|
|
++ HVFState *s = HVF_STATE(obj);
|
|
|
+
|
|
|
+ s->num_slots = ARRAY_SIZE(s->slots);
|
|
|
+ for (x = 0; x < s->num_slots; ++x) {
|
|
|
+ s->slots[x].size = 0;
|
|
|
+ s->slots[x].slot_id = x;
|
|
|
+ }
|
|
|
+-
|
|
|
+- hvf_state = s;
|
|
|
+- memory_listener_register(&hvf_memory_listener, &address_space_memory);
|
|
|
+-
|
|
|
+- return hvf_arch_init();
|
|
|
+ }
|
|
|
+
|
|
|
+ static void hvf_accel_class_init(ObjectClass *oc, void *data)
|
|
|
+@@ -346,12 +362,21 @@ static void hvf_accel_class_init(ObjectClass *oc, void *data)
|
|
|
+ ac->name = "HVF";
|
|
|
+ ac->init_machine = hvf_accel_init;
|
|
|
+ ac->allowed = &hvf_allowed;
|
|
|
++
|
|
|
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
|
|
|
++ object_class_property_add_bool(oc, "tso",
|
|
|
++ hvf_get_tso, hvf_set_tso);
|
|
|
++ object_class_property_set_description(oc, "tso",
|
|
|
++ "Set on/off to enable/disable total store ordering mode");
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+ static const TypeInfo hvf_accel_type = {
|
|
|
+ .name = TYPE_HVF_ACCEL,
|
|
|
+ .parent = TYPE_ACCEL,
|
|
|
++ .instance_init = hvf_accel_instance_init,
|
|
|
+ .class_init = hvf_accel_class_init,
|
|
|
++ .instance_size = sizeof(HVFState),
|
|
|
+ };
|
|
|
+
|
|
|
+ static void hvf_type_init(void)
|
|
|
+diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
|
|
|
+index 6545f7cd61..9f550b9f8b 100644
|
|
|
+--- a/include/sysemu/hvf_int.h
|
|
|
++++ b/include/sysemu/hvf_int.h
|
|
|
+@@ -17,6 +17,15 @@
|
|
|
+ #include <Hypervisor/hv.h>
|
|
|
+ #endif
|
|
|
+
|
|
|
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
|
|
|
++extern hv_return_t _hv_vm_config_set_isa(hv_vm_config_t config, uint32_t isa);
|
|
|
++extern hv_return_t _hv_vcpu_get_actlr(hv_vcpu_t vcpu, uint64_t* value);
|
|
|
++extern hv_return_t _hv_vcpu_set_actlr(hv_vcpu_t vcpu, uint64_t value);
|
|
|
++
|
|
|
++#define HV_VM_CONFIG_ISA_PRIVATE (3)
|
|
|
++#define ACTLR_EL1_TSO_ENABLE_MASK ((1 << 1) | (1 << 9))
|
|
|
++#endif
|
|
|
++
|
|
|
+ /* hvf_slot flags */
|
|
|
+ #define HVF_SLOT_LOG (1 << 0)
|
|
|
+
|
|
|
+@@ -45,6 +54,9 @@ struct HVFState {
|
|
|
+
|
|
|
+ hvf_vcpu_caps *hvf_caps;
|
|
|
+ uint64_t vtimer_offset;
|
|
|
++#if defined(CONFIG_HVF_PRIVATE) && defined(__aarch64__)
|
|
|
++ bool tso_mode;
|
|
|
++#endif
|
|
|
+ };
|
|
|
+ extern HVFState *hvf_state;
|
|
|
+
|
|
|
+@@ -56,6 +68,7 @@ struct hvf_vcpu_state {
|
|
|
+ };
|
|
|
+
|
|
|
+ void assert_hvf_ok(hv_return_t ret);
|
|
|
++hv_return_t hvf_arch_vm_create(HVFState *s);
|
|
|
+ int hvf_arch_init(void);
|
|
|
+ int hvf_arch_init_vcpu(CPUState *cpu);
|
|
|
+ void hvf_arch_vcpu_destroy(CPUState *cpu);
|
|
|
+diff --git a/meson.build b/meson.build
|
|
|
+index 00fccfc676..ab6a60d1a8 100644
|
|
|
+--- a/meson.build
|
|
|
++++ b/meson.build
|
|
|
+@@ -440,6 +440,7 @@ if get_option('hvf').allowed()
|
|
|
+ required: get_option('hvf'))
|
|
|
+ if hvf.found()
|
|
|
+ accelerators += 'CONFIG_HVF'
|
|
|
++ config_host_data.set('CONFIG_HVF_PRIVATE', get_option('hvf_private'))
|
|
|
+ endif
|
|
|
+ endif
|
|
|
+ if get_option('hax').allowed()
|
|
|
+diff --git a/meson_options.txt b/meson_options.txt
|
|
|
+index 43916078c8..8415d45071 100644
|
|
|
+--- a/meson_options.txt
|
|
|
++++ b/meson_options.txt
|
|
|
+@@ -72,6 +72,8 @@ option('whpx', type: 'feature', value: 'auto',
|
|
|
+ description: 'WHPX acceleration support')
|
|
|
+ option('hvf', type: 'feature', value: 'auto',
|
|
|
+ description: 'HVF acceleration support')
|
|
|
++option('hvf_private', type: 'boolean', value: 'false',
|
|
|
++ description: 'HVF private features (entitlements required)')
|
|
|
+ option('nvmm', type: 'feature', value: 'auto',
|
|
|
+ description: 'NVMM acceleration support')
|
|
|
+ option('xen', type: 'feature', value: 'auto',
|
|
|
+diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
|
|
+index 2496991056..010515ac98 100644
|
|
|
+--- a/scripts/meson-buildoptions.sh
|
|
|
++++ b/scripts/meson-buildoptions.sh
|
|
|
+@@ -26,6 +26,7 @@ meson_options_help() {
|
|
|
+ printf "%s\n" ' --enable-fuzzing build fuzzing targets'
|
|
|
+ printf "%s\n" ' --enable-gcov Enable coverage tracking.'
|
|
|
+ printf "%s\n" ' --enable-gprof QEMU profiling with gprof'
|
|
|
++ printf "%s\n" ' --enable-hvf-private HVF private features (entitlements required)'
|
|
|
+ printf "%s\n" ' --enable-lto Use link time optimization'
|
|
|
+ printf "%s\n" ' --enable-malloc=CHOICE choose memory allocator to use [system] (choices:'
|
|
|
+ printf "%s\n" ' jemalloc/system/tcmalloc)'
|
|
|
+@@ -289,6 +290,8 @@ _meson_option_parse() {
|
|
|
+ --disable-hax) printf "%s" -Dhax=disabled ;;
|
|
|
+ --enable-hvf) printf "%s" -Dhvf=enabled ;;
|
|
|
+ --disable-hvf) printf "%s" -Dhvf=disabled ;;
|
|
|
++ --enable-hvf-private) printf "%s" -Dhvf_private=true ;;
|
|
|
++ --disable-hvf-private) printf "%s" -Dhvf_private=false ;;
|
|
|
+ --iasl=*) quote_sh "-Diasl=$2" ;;
|
|
|
+ --enable-iconv) printf "%s" -Diconv=enabled ;;
|
|
|
+ --disable-iconv) printf "%s" -Diconv=disabled ;;
|
|
|
+diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
|
|
|
+index 2c0323fe7f..bb7a4d5004 100644
|
|
|
+--- a/target/arm/hvf/hvf.c
|
|
|
++++ b/target/arm/hvf/hvf.c
|
|
|
+@@ -623,6 +623,18 @@ int hvf_arch_init_vcpu(CPUState *cpu)
|
|
|
+ &arm_cpu->isar.id_aa64mmfr0);
|
|
|
+ assert_hvf_ok(ret);
|
|
|
+
|
|
|
++#if defined(CONFIG_HVF_PRIVATE)
|
|
|
++ /* enable TSO mode */
|
|
|
++ if (hvf_state->tso_mode) {
|
|
|
++ uint64_t actlr;
|
|
|
++ ret = _hv_vcpu_get_actlr(cpu->hvf->fd, &actlr);
|
|
|
++ assert_hvf_ok(ret);
|
|
|
++ actlr |= ACTLR_EL1_TSO_ENABLE_MASK;
|
|
|
++ ret = _hv_vcpu_set_actlr(cpu->hvf->fd, actlr);
|
|
|
++ assert_hvf_ok(ret);
|
|
|
++ }
|
|
|
++#endif
|
|
|
++
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -1343,6 +1355,22 @@ static void hvf_vm_state_change(void *opaque, bool running, RunState state)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++hv_return_t hvf_arch_vm_create(HVFState *s)
|
|
|
++{
|
|
|
++#if defined(CONFIG_HVF_PRIVATE)
|
|
|
++ hv_return_t ret;
|
|
|
++ hv_vm_config_t config = hv_vm_config_create();
|
|
|
++ if (s->tso_mode) {
|
|
|
++ _hv_vm_config_set_isa(config, HV_VM_CONFIG_ISA_PRIVATE);
|
|
|
++ }
|
|
|
++ ret = hv_vm_create(config);
|
|
|
++ os_release(config);
|
|
|
++ return ret;
|
|
|
++#else
|
|
|
++ return hv_vm_create(NULL);
|
|
|
++#endif
|
|
|
++}
|
|
|
++
|
|
|
+ int hvf_arch_init(void)
|
|
|
+ {
|
|
|
+ hvf_state->vtimer_offset = mach_absolute_time();
|
|
|
+diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
|
|
|
+index 8d2248bb3f..8283a9b761 100644
|
|
|
+--- a/target/i386/hvf/hvf.c
|
|
|
++++ b/target/i386/hvf/hvf.c
|
|
|
+@@ -212,6 +212,11 @@ void hvf_kick_vcpu_thread(CPUState *cpu)
|
|
|
+ cpus_kick_thread(cpu);
|
|
|
+ }
|
|
|
+
|
|
|
++hv_return_t hvf_arch_vm_create(HVFState *s)
|
|
|
++{
|
|
|
++ return hv_vm_create(HV_VM_DEFAULT);
|
|
|
++}
|
|
|
++
|
|
|
+ int hvf_arch_init(void)
|
|
|
+ {
|
|
|
+ return 0;
|
|
|
+=======
|
|
|
+From c874e68e5a1635326f8a2f52320b8dbe82f6be51 Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Fri, 30 Dec 2022 20:24:00 -0800
|
|
|
+Subject: [PATCH] tcti: disable TARGET_TB_PCREL for TCTI
|
|
|
+
|
|
|
+It is currently not supported.
|
|
|
+---
|
|
|
+ target/arm/cpu-param.h | 2 ++
|
|
|
+ target/i386/cpu-param.h | 2 +-
|
|
|
+ 2 files changed, 3 insertions(+), 1 deletion(-)
|
|
|
+
|
|
|
+diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
|
|
|
+index 53cac9c89b..6c4af8f0d2 100644
|
|
|
+--- a/target/arm/cpu-param.h
|
|
|
++++ b/target/arm/cpu-param.h
|
|
|
+@@ -31,7 +31,9 @@
|
|
|
+ # define TARGET_PAGE_BITS_VARY
|
|
|
+ # define TARGET_PAGE_BITS_MIN 10
|
|
|
+
|
|
|
++#ifndef CONFIG_TCG_THREADED_INTERPRETER
|
|
|
+ # define TARGET_TB_PCREL 1
|
|
|
++#endif
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Cache the attrs and shareability fields from the page table entry.
|
|
|
+diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h
|
|
|
+index f579b16bd2..0975265ff3 100644
|
|
|
+--- a/target/i386/cpu-param.h
|
|
|
++++ b/target/i386/cpu-param.h
|
|
|
+@@ -25,7 +25,7 @@
|
|
|
+ #define TARGET_PAGE_BITS 12
|
|
|
+ #define NB_MMU_MODES 5
|
|
|
+
|
|
|
+-#ifndef CONFIG_USER_ONLY
|
|
|
++#if !defined(CONFIG_USER_ONLY) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
|
|
|
+ # define TARGET_TB_PCREL 1
|
|
|
+ #endif
|
|
|
+
|
|
|
+--
|
|
|
+2.28.0
|
|
|
+
|
|
|
+From 80c0e3099fe82b61e7a094f9f24a3c4aa030d5f3 Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <50960678+osy@users.noreply.github.com>
|
|
|
+Date: Sun, 1 Jan 2023 16:51:56 -0800
|
|
|
+Subject: [PATCH] vmnet: stop recieving events when VM is stopped
|
|
|
+
|
|
|
+When the VM is stopped using the HMP command "stop", soon the handler will
|
|
|
+stop reading from the vmnet interface. This causes a flood of
|
|
|
+`VMNET_INTERFACE_PACKETS_AVAILABLE` events to arrive and puts the host CPU
|
|
|
+at 100%. We fix this by removing the event handler from vmnet when the VM
|
|
|
+is no longer in a running state and restore it when we return to a running
|
|
|
+state.
|
|
|
+---
|
|
|
+ net/vmnet-common.m | 48 +++++++++++++++++++++++++++++++++-------------
|
|
|
+ net/vmnet_int.h | 2 ++
|
|
|
+ 2 files changed, 37 insertions(+), 13 deletions(-)
|
|
|
+
|
|
|
+diff --git a/net/vmnet-common.m b/net/vmnet-common.m
|
|
|
+index 2cb60b9ddd..2958283485 100644
|
|
|
+--- a/net/vmnet-common.m
|
|
|
++++ b/net/vmnet-common.m
|
|
|
+@@ -17,6 +17,7 @@
|
|
|
+ #include "clients.h"
|
|
|
+ #include "qemu/error-report.h"
|
|
|
+ #include "qapi/error.h"
|
|
|
++#include "sysemu/runstate.h"
|
|
|
+
|
|
|
+ #include <vmnet/vmnet.h>
|
|
|
+ #include <dispatch/dispatch.h>
|
|
|
+@@ -242,6 +243,35 @@ static void vmnet_bufs_init(VmnetState *s)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++/**
|
|
|
++ * Called on state change to un-register/re-register handlers
|
|
|
++ */
|
|
|
++static void vmnet_vm_state_change_cb(void *opaque, bool running, RunState state)
|
|
|
++{
|
|
|
++ VmnetState *s = opaque;
|
|
|
++
|
|
|
++ if (running) {
|
|
|
++ vmnet_interface_set_event_callback(
|
|
|
++ s->vmnet_if,
|
|
|
++ VMNET_INTERFACE_PACKETS_AVAILABLE,
|
|
|
++ s->if_queue,
|
|
|
++ ^(interface_event_t event_id, xpc_object_t event) {
|
|
|
++ assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
|
|
|
++ /*
|
|
|
++ * This function is being called from a non qemu thread, so
|
|
|
++ * we only schedule a BH, and do the rest of the io completion
|
|
|
++ * handling from vmnet_send_bh() which runs in a qemu context.
|
|
|
++ */
|
|
|
++ qemu_bh_schedule(s->send_bh);
|
|
|
++ });
|
|
|
++ } else {
|
|
|
++ vmnet_interface_set_event_callback(
|
|
|
++ s->vmnet_if,
|
|
|
++ VMNET_INTERFACE_PACKETS_AVAILABLE,
|
|
|
++ NULL,
|
|
|
++ NULL);
|
|
|
++ }
|
|
|
++}
|
|
|
+
|
|
|
+ int vmnet_if_create(NetClientState *nc,
|
|
|
+ xpc_object_t if_desc,
|
|
|
+@@ -329,19 +359,9 @@ int vmnet_if_create(NetClientState *nc,
|
|
|
+ s->packets_send_current_pos = 0;
|
|
|
+ s->packets_send_end_pos = 0;
|
|
|
+
|
|
|
+- vmnet_interface_set_event_callback(
|
|
|
+- s->vmnet_if,
|
|
|
+- VMNET_INTERFACE_PACKETS_AVAILABLE,
|
|
|
+- s->if_queue,
|
|
|
+- ^(interface_event_t event_id, xpc_object_t event) {
|
|
|
+- assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
|
|
|
+- /*
|
|
|
+- * This function is being called from a non qemu thread, so
|
|
|
+- * we only schedule a BH, and do the rest of the io completion
|
|
|
+- * handling from vmnet_send_bh() which runs in a qemu context.
|
|
|
+- */
|
|
|
+- qemu_bh_schedule(s->send_bh);
|
|
|
+- });
|
|
|
++ vmnet_vm_state_change_cb(s, 1, RUN_STATE_RUNNING);
|
|
|
++
|
|
|
++ s->change = qemu_add_vm_change_state_handler(vmnet_vm_state_change_cb, s);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+@@ -356,6 +376,8 @@ void vmnet_cleanup_common(NetClientState *nc)
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
++ vmnet_vm_state_change_cb(s, 0, RUN_STATE_SHUTDOWN);
|
|
|
++ qemu_del_vm_change_state_handler(s->change);
|
|
|
+ if_stopped_sem = dispatch_semaphore_create(0);
|
|
|
+ vmnet_stop_interface(
|
|
|
+ s->vmnet_if,
|
|
|
+diff --git a/net/vmnet_int.h b/net/vmnet_int.h
|
|
|
+index adf6e8c20d..ffba92108f 100644
|
|
|
+--- a/net/vmnet_int.h
|
|
|
++++ b/net/vmnet_int.h
|
|
|
+@@ -46,6 +46,8 @@ typedef struct VmnetState {
|
|
|
+ int packets_send_end_pos;
|
|
|
+
|
|
|
+ struct iovec iov_buf[VMNET_PACKETS_LIMIT];
|
|
|
++
|
|
|
++ VMChangeStateEntry *change;
|
|
|
+ } VmnetState;
|
|
|
+
|
|
|
+ const char *vmnet_status_map_str(vmnet_return_t status);
|
|
|
+--
|
|
|
+2.28.0
|
|
|
+
|
|
|
+From bd2fc471e7a0b1e8e700b0be8e5ae08fe2fd5b9b Mon Sep 17 00:00:00 2001
|
|
|
+From: osy <osy@turing.llc>
|
|
|
+Date: Mon, 6 Mar 2023 15:23:31 -0800
|
|
|
+Subject: [PATCH] Merge branch 'with_tcti_vectors' into utm-edition
|
|
|
+
|
|
|
+---
|
|
|
+ block/file-posix.c | 21 +
|
|
|
+ include/qemu/osdep.h | 2 +-
|
|
|
+ meson.build | 87 ++-
|
|
|
+ tcg/aarch64-tcti/tcg-target-con-set.h | 23 +-
|
|
|
+ tcg/aarch64-tcti/tcg-target-con-str.h | 11 +-
|
|
|
+ tcg/aarch64-tcti/tcg-target.c.inc | 1274 ++++++++++++++++++++++++++-------
|
|
|
+ tcg/aarch64-tcti/tcg-target.h | 163 +++--
|
|
|
+ tcg/aarch64-tcti/tcg-target.opc.h | 14 +
|
|
|
+ tcg/aarch64-tcti/tcti-gadget-gen.py | 613 +++++++++++++---
|
|
|
+ util/osdep.c | 6 +
|
|
|
+ 10 files changed, 1775 insertions(+), 439 deletions(-)
|
|
|
+
|
|
|
+diff --git a/block/file-posix.c b/block/file-posix.c
|
|
|
+index 9f6e6279d9..766bbb6cb5 100644
|
|
|
+--- a/block/file-posix.c
|
|
|
++++ b/block/file-posix.c
|
|
|
+@@ -280,6 +280,13 @@ static int raw_normalize_devicepath(const char **filename, Error **errp)
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+
|
|
|
++#if defined(CONFIG_IOS)
|
|
|
++static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
|
|
|
++{
|
|
|
++ return -ENOTSUP; /* not supported on iOS */
|
|
|
++}
|
|
|
++#else /* CONFIG_IOS */
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Get logical block size via ioctl. On success store it in @sector_size_p.
|
|
|
+ */
|
|
|
+@@ -313,6 +320,8 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
|
|
|
+ return success ? 0 : -errno;
|
|
|
+ }
|
|
|
+
|
|
|
++#endif
|
|
|
++
|
|
|
+ /**
|
|
|
+ * Get physical block size of @fd.
|
|
|
+ * On success, store it in @blk_size and return 0.
|
|
|
+@@ -1449,12 +1458,24 @@ static bool preadv_present = true;
|
|
|
+ static ssize_t
|
|
|
+ qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
|
|
+ {
|
|
|
++#ifdef CONFIG_DARWIN /* preadv introduced in macOS 11 */
|
|
|
++ if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
|
|
|
++ preadv_present = false;
|
|
|
++ return -ENOSYS;
|
|
|
++ } else
|
|
|
++#endif
|
|
|
+ return preadv(fd, iov, nr_iov, offset);
|
|
|
+ }
|
|
|
+
|
|
|
+ static ssize_t
|
|
|
+ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
|
|
+ {
|
|
|
++#ifdef CONFIG_DARWIN /* pwritev introduced in macOS 11 */
|
|
|
++ if (!__builtin_available(macOS 11, iOS 14, watchOS 7, tvOS 14, *)) {
|
|
|
++ preadv_present = false;
|
|
|
++ return -ENOSYS;
|
|
|
++ } else
|
|
|
++#endif
|
|
|
+ return pwritev(fd, iov, nr_iov, offset);
|
|
|
+ }
|
|
|
+
|
|
|
+diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
|
|
|
+index 8dbf741ee4..97bc3ceac3 100644
|
|
|
+--- a/include/qemu/osdep.h
|
|
|
++++ b/include/qemu/osdep.h
|
|
|
+@@ -676,7 +676,7 @@ size_t qemu_get_host_physmem(void);
|
|
|
+ /**
|
|
|
+ * Platforms which do not support system() return ENOSYS
|
|
|
+ */
|
|
|
+-#ifndef HAVE_SYSTEM_FUNCTION
|
|
|
++#if !defined(HAVE_SYSTEM_FUNCTION) || defined(CONFIG_IOS)
|
|
|
+ #define system platform_does_not_support_system
|
|
|
+ static inline int platform_does_not_support_system(const char *command)
|
|
|
+ {
|
|
|
+diff --git a/meson.build b/meson.build
|
|
|
+index ab6a60d1a8..5fd46123de 100644
|
|
|
+--- a/meson.build
|
|
|
++++ b/meson.build
|
|
|
+@@ -294,6 +294,7 @@ add_project_arguments('-iquote', '.',
|
|
|
+
|
|
|
+ if host_machine.system() == 'darwin'
|
|
|
+ add_languages('objc', required: false, native: false)
|
|
|
++ add_project_link_arguments(['-fvisibility-inlines-hidden', '-Xlinker', '-no_deduplicate'], native: false, language: ['c', 'cpp', 'objc'])
|
|
|
+ endif
|
|
|
+
|
|
|
+ sparse = find_program('cgcc', required: get_option('sparse'))
|
|
|
+@@ -455,6 +456,8 @@ if targetos == 'netbsd'
|
|
|
+ endif
|
|
|
+ endif
|
|
|
+
|
|
|
++tcti_gadgets = files()
|
|
|
++
|
|
|
+ tcg_arch = host_arch
|
|
|
+ if get_option('tcg').allowed()
|
|
|
+ if host_arch == 'unknown'
|
|
|
+@@ -483,14 +486,77 @@ if get_option('tcg').allowed()
|
|
|
+
|
|
|
+ # Tell our compiler how to generate our TCTI gadgets.
|
|
|
+ gadget_generator = 'tcg/@0@/tcti-gadget-gen.py'.format(tcg_arch)
|
|
|
+- tcti_gadgets = custom_target('tcti-gadgets.c.inc',
|
|
|
+- output: 'tcti-gadgets.c.inc',
|
|
|
+- input: gadget_generator,
|
|
|
+- command: [find_program(gadget_generator), '@OUTPUT@'],
|
|
|
+- build_by_default: true,
|
|
|
+- build_always_stale: false)
|
|
|
+-
|
|
|
+- genh += tcti_gadgets
|
|
|
++ tcti_sources = [
|
|
|
++ 'tcti_gadgets.h',
|
|
|
++ 'tcti_misc_gadgets.c',
|
|
|
++ 'tcti_misc_gadgets.h',
|
|
|
++ 'tcti_setcond_gadgets.c',
|
|
|
++ 'tcti_setcond_gadgets.h',
|
|
|
++ 'tcti_brcond_gadgets.c',
|
|
|
++ 'tcti_brcond_gadgets.h',
|
|
|
++ 'tcti_mov_gadgets.c',
|
|
|
++ 'tcti_mov_gadgets.h',
|
|
|
++ 'tcti_load_signed_gadgets.c',
|
|
|
++ 'tcti_load_signed_gadgets.h',
|
|
|
++ 'tcti_load_unsigned_gadgets.c',
|
|
|
++ 'tcti_load_unsigned_gadgets.h',
|
|
|
++ 'tcti_store_gadgets.c',
|
|
|
++ 'tcti_store_gadgets.h',
|
|
|
++ 'tcti_arithmetic_gadgets.c',
|
|
|
++ 'tcti_arithmetic_gadgets.h',
|
|
|
++ 'tcti_logical_gadgets.c',
|
|
|
++ 'tcti_logical_gadgets.h',
|
|
|
++ 'tcti_extension_gadgets.c',
|
|
|
++ 'tcti_extension_gadgets.h',
|
|
|
++ 'tcti_bitwise_gadgets.c',
|
|
|
++ 'tcti_bitwise_gadgets.h',
|
|
|
++ 'tcti_byteswap_gadgets.c',
|
|
|
++ 'tcti_byteswap_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_aligned_signed_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_aligned_signed_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_unaligned_signed_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_unaligned_signed_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_slowpath_signed_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_slowpath_signed_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_aligned_unsigned_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_aligned_unsigned_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_unaligned_unsigned_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_unaligned_unsigned_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_slowpath_unsigned_le_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_slowpath_unsigned_le_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_aligned_be_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_aligned_be_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_unaligned_be_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_unaligned_be_gadgets.h',
|
|
|
++ 'tcti_qemu_ld_slowpath_be_gadgets.c',
|
|
|
++ 'tcti_qemu_ld_slowpath_be_gadgets.h',
|
|
|
++ 'tcti_qemu_st_aligned_le_gadgets.c',
|
|
|
++ 'tcti_qemu_st_aligned_le_gadgets.h',
|
|
|
++ 'tcti_qemu_st_unaligned_le_gadgets.c',
|
|
|
++ 'tcti_qemu_st_unaligned_le_gadgets.h',
|
|
|
++ 'tcti_qemu_st_slowpath_le_gadgets.c',
|
|
|
++ 'tcti_qemu_st_slowpath_le_gadgets.h',
|
|
|
++ 'tcti_qemu_st_aligned_be_gadgets.c',
|
|
|
++ 'tcti_qemu_st_aligned_be_gadgets.h',
|
|
|
++ 'tcti_qemu_st_unaligned_be_gadgets.c',
|
|
|
++ 'tcti_qemu_st_unaligned_be_gadgets.h',
|
|
|
++ 'tcti_qemu_st_slowpath_be_gadgets.c',
|
|
|
++ 'tcti_qemu_st_slowpath_be_gadgets.h',
|
|
|
++ 'tcti_simd_base_gadgets.c',
|
|
|
++ 'tcti_simd_base_gadgets.h',
|
|
|
++ 'tcti_simd_arithmetic_gadgets.c',
|
|
|
++ 'tcti_simd_arithmetic_gadgets.h',
|
|
|
++ 'tcti_simd_logical_gadgets.c',
|
|
|
++ 'tcti_simd_logical_gadgets.h',
|
|
|
++ 'tcti_simd_immediate_gadgets.c',
|
|
|
++ 'tcti_simd_immediate_gadgets.h',
|
|
|
++ ]
|
|
|
++ tcti_gadgets = custom_target('tcti-gadgets.h',
|
|
|
++ output: tcti_sources,
|
|
|
++ input: gadget_generator,
|
|
|
++ command: [find_program(gadget_generator)],
|
|
|
++ build_by_default: true,
|
|
|
++ build_always_stale: false)
|
|
|
+ elif host_arch == 'x86_64'
|
|
|
+ tcg_arch = 'i386'
|
|
|
+ elif host_arch == 'ppc64'
|
|
|
+@@ -3157,6 +3223,11 @@ if get_option('b_lto')
|
|
|
+ endif
|
|
|
+ common_ss.add(pagevary)
|
|
|
+ specific_ss.add(files('page-vary.c'))
|
|
|
++specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tcg/tci.c'))
|
|
|
++
|
|
|
++# FIXME: This is being used for now for development quickness, but these realy should be
|
|
|
++# added to a gadget-specific shared library (tcti_ss).
|
|
|
++specific_ss.add(when: 'CONFIG_TCG_THREADED_INTERPRETER', if_true: tcti_gadgets)
|
|
|
+
|
|
|
+ subdir('backends')
|
|
|
+ subdir('disas')
|
|
|
+diff --git a/tcg/aarch64-tcti/tcg-target-con-set.h b/tcg/aarch64-tcti/tcg-target-con-set.h
|
|
|
+index f51b7bcb13..a0b91bb320 100644
|
|
|
+--- a/tcg/aarch64-tcti/tcg-target-con-set.h
|
|
|
++++ b/tcg/aarch64-tcti/tcg-target-con-set.h
|
|
|
+@@ -9,13 +9,24 @@
|
|
|
+ * Each operand should be a sequence of constraint letters as defined by
|
|
|
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
|
|
|
+ */
|
|
|
++
|
|
|
++// Simple register functions.
|
|
|
++C_O0_I1(r)
|
|
|
+ C_O0_I2(r, r)
|
|
|
+ C_O0_I3(r, r, r)
|
|
|
+-C_O0_I4(r, r, r, r)
|
|
|
++//C_O0_I4(r, r, r, r)
|
|
|
+ C_O1_I1(r, r)
|
|
|
+-C_O1_I2(r, 0, r)
|
|
|
+ C_O1_I2(r, r, r)
|
|
|
+-C_O1_I4(r, r, r, r, r)
|
|
|
+-C_O2_I1(r, r, r)
|
|
|
+-C_O2_I2(r, r, r, r)
|
|
|
+-C_O2_I4(r, r, r, r, r, r)
|
|
|
++//C_O1_I4(r, r, r, r, r)
|
|
|
++//C_O2_I1(r, r, r)
|
|
|
++//C_O2_I2(r, r, r, r)
|
|
|
++//C_O2_I4(r, r, r, r, r, r)
|
|
|
++
|
|
|
++// Vector functions.
|
|
|
++C_O1_I1(w, w)
|
|
|
++C_O1_I1(w, r)
|
|
|
++C_O0_I2(w, r)
|
|
|
++C_O1_I1(w, wr)
|
|
|
++C_O1_I2(w, w, w)
|
|
|
++C_O1_I3(w, w, w, w)
|
|
|
++C_O1_I2(w, 0, w)
|
|
|
+\ No newline at end of file
|
|
|
+diff --git a/tcg/aarch64-tcti/tcg-target-con-str.h b/tcg/aarch64-tcti/tcg-target-con-str.h
|
|
|
+index 87c0f19e9c..94d06d3e74 100644
|
|
|
+--- a/tcg/aarch64-tcti/tcg-target-con-str.h
|
|
|
++++ b/tcg/aarch64-tcti/tcg-target-con-str.h
|
|
|
+@@ -8,4 +8,13 @@
|
|
|
+ * Define constraint letters for register sets:
|
|
|
+ * REGS(letter, register_mask)
|
|
|
+ */
|
|
|
+-REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS))
|
|
|
++REGS('r', TCG_MASK_GP_REGISTERS)
|
|
|
++REGS('w', TCG_MASK_VECTOR_REGISTERS)
|
|
|
++
|
|
|
++/*
|
|
|
++ * Define constraint letters for constants:
|
|
|
++ * CONST(letter, TCG_CT_CONST_* bit set)
|
|
|
++ */
|
|
|
++
|
|
|
++// Simple 64-bit immediates.
|
|
|
++CONST('I', 0xFFFFFFFFFFFFFFFF)
|
|
|
+diff --git a/tcg/aarch64-tcti/tcg-target.c.inc b/tcg/aarch64-tcti/tcg-target.c.inc
|
|
|
+index af4cc8d664..10d6c4ec1b 100644
|
|
|
+--- a/tcg/aarch64-tcti/tcg-target.c.inc
|
|
|
++++ b/tcg/aarch64-tcti/tcg-target.c.inc
|
|
|
+@@ -22,13 +22,16 @@
|
|
|
+ * THE SOFTWARE.
|
|
|
+ */
|
|
|
+
|
|
|
++
|
|
|
++// Rich disassembly is nice in theory, but it's -slow-.
|
|
|
++//#define TCTI_GADGET_RICH_DISASSEMBLY
|
|
|
++
|
|
|
+ #define TCTI_GADGET_IMMEDIATE_ARRAY_LEN 64
|
|
|
+
|
|
|
+ #include "tcg/tcg-ldst.h"
|
|
|
+
|
|
|
+-// Grab our gadget definitions.
|
|
|
+-// FIXME: use the system path instead of hardcoding this?
|
|
|
+-#include "tcti-gadgets.c.inc"
|
|
|
++// Grab our gadget headers.
|
|
|
++#include "tcti_gadgets.h"
|
|
|
+
|
|
|
+ /* Marker for missing code. */
|
|
|
+ #define TODO() \
|
|
|
+@@ -47,64 +50,15 @@
|
|
|
+ # define tcti_assert(cond) ((void)0)
|
|
|
+ #endif
|
|
|
+
|
|
|
+-/* Bitfield n...m (in 32 bit value). */
|
|
|
+-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
|
|
|
+-
|
|
|
+-/**
|
|
|
+- * Macro that defines a look-up tree for named QEMU_LD gadgets.
|
|
|
+- */
|
|
|
+-#define LD_MEMOP_LOOKUP(variable, arg, suffix) \
|
|
|
+- switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
|
|
|
+- case MO_UB: variable = gadget_qemu_ld_ub_ ## suffix; break; \
|
|
|
+- case MO_SB: variable = gadget_qemu_ld_sb_ ## suffix; break; \
|
|
|
+- case MO_LEUW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
|
|
|
+- case MO_LESW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
|
|
|
+- case MO_LEUL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
|
|
|
+- case MO_LESL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
|
|
|
+- case MO_LEUQ: variable = gadget_qemu_ld_leq_ ## suffix; break; \
|
|
|
+- case MO_BEUW: variable = gadget_qemu_ld_beuw_ ## suffix; break; \
|
|
|
+- case MO_BESW: variable = gadget_qemu_ld_besw_ ## suffix; break; \
|
|
|
+- case MO_BEUL: variable = gadget_qemu_ld_beul_ ## suffix; break; \
|
|
|
+- case MO_BESL: variable = gadget_qemu_ld_besl_ ## suffix; break; \
|
|
|
+- case MO_BEUQ: variable = gadget_qemu_ld_beq_ ## suffix; break; \
|
|
|
+- default: \
|
|
|
+- g_assert_not_reached(); \
|
|
|
+- }
|
|
|
+-#define LD_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
|
|
|
+- if (a_bits >= s_bits) { \
|
|
|
+- LD_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
|
|
|
+- } else { \
|
|
|
+- LD_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
|
|
|
+- }
|
|
|
+-
|
|
|
+-
|
|
|
+-
|
|
|
+-/**
|
|
|
+- * Macro that defines a look-up tree for named QEMU_ST gadgets.
|
|
|
+- */
|
|
|
+-#define ST_MEMOP_LOOKUP(variable, arg, suffix) \
|
|
|
+- switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
|
|
|
+- case MO_UB: variable = gadget_qemu_st_ub_ ## suffix; break; \
|
|
|
+- case MO_LEUW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
|
|
|
+- case MO_LEUL: variable = gadget_qemu_st_leul_ ## suffix; break; \
|
|
|
+- case MO_LEUQ: variable = gadget_qemu_st_leq_ ## suffix; break; \
|
|
|
+- case MO_BEUW: variable = gadget_qemu_st_beuw_ ## suffix; break; \
|
|
|
+- case MO_BEUL: variable = gadget_qemu_st_beul_ ## suffix; break; \
|
|
|
+- case MO_BEUQ: variable = gadget_qemu_st_beq_ ## suffix; break; \
|
|
|
+- default: \
|
|
|
+- g_assert_not_reached(); \
|
|
|
+- }
|
|
|
+-#define ST_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
|
|
|
+- if (a_bits >= s_bits) { \
|
|
|
+- ST_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
|
|
|
+- } else { \
|
|
|
+- ST_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
|
|
|
+- }
|
|
|
+
|
|
|
++/********************************
|
|
|
++ * TCG Constraints Definitions *
|
|
|
++ ********************************/
|
|
|
+
|
|
|
+ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|
|
+ {
|
|
|
+ switch (op) {
|
|
|
++
|
|
|
+ case INDEX_op_ld8u_i32:
|
|
|
+ case INDEX_op_ld8s_i32:
|
|
|
+ case INDEX_op_ld16u_i32:
|
|
|
+@@ -138,6 +92,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|
|
+ case INDEX_op_bswap32_i32:
|
|
|
+ case INDEX_op_bswap32_i64:
|
|
|
+ case INDEX_op_bswap64_i64:
|
|
|
++ case INDEX_op_extrl_i64_i32:
|
|
|
++ case INDEX_op_extrh_i64_i32:
|
|
|
+ return C_O1_I1(r, r);
|
|
|
+
|
|
|
+ case INDEX_op_st8_i32:
|
|
|
+@@ -191,6 +147,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|
|
+ case INDEX_op_rotr_i64:
|
|
|
+ case INDEX_op_setcond_i32:
|
|
|
+ case INDEX_op_setcond_i64:
|
|
|
++ case INDEX_op_clz_i32:
|
|
|
++ case INDEX_op_clz_i64:
|
|
|
++ case INDEX_op_ctz_i32:
|
|
|
++ case INDEX_op_ctz_i64:
|
|
|
+ return C_O1_I2(r, r, r);
|
|
|
+
|
|
|
+ case INDEX_op_brcond_i32:
|
|
|
+@@ -204,12 +164,65 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
|
|
+ case INDEX_op_qemu_st_i64:
|
|
|
+ return C_O0_I3(r, r, r);
|
|
|
+
|
|
|
++ //
|
|
|
++ // Vector ops.
|
|
|
++ //
|
|
|
++ case INDEX_op_add_vec:
|
|
|
++ case INDEX_op_sub_vec:
|
|
|
++ case INDEX_op_mul_vec:
|
|
|
++ case INDEX_op_xor_vec:
|
|
|
++ case INDEX_op_ssadd_vec:
|
|
|
++ case INDEX_op_sssub_vec:
|
|
|
++ case INDEX_op_usadd_vec:
|
|
|
++ case INDEX_op_ussub_vec:
|
|
|
++ case INDEX_op_smax_vec:
|
|
|
++ case INDEX_op_smin_vec:
|
|
|
++ case INDEX_op_umax_vec:
|
|
|
++ case INDEX_op_umin_vec:
|
|
|
++ case INDEX_op_shlv_vec:
|
|
|
++ case INDEX_op_shrv_vec:
|
|
|
++ case INDEX_op_sarv_vec:
|
|
|
++ case INDEX_op_aa64_sshl_vec:
|
|
|
++ return C_O1_I2(w, w, w);
|
|
|
++ case INDEX_op_not_vec:
|
|
|
++ case INDEX_op_neg_vec:
|
|
|
++ case INDEX_op_abs_vec:
|
|
|
++ case INDEX_op_shli_vec:
|
|
|
++ case INDEX_op_shri_vec:
|
|
|
++ case INDEX_op_sari_vec:
|
|
|
++ return C_O1_I1(w, w);
|
|
|
++ case INDEX_op_ld_vec:
|
|
|
++ case INDEX_op_dupm_vec:
|
|
|
++ return C_O1_I1(w, r);
|
|
|
++ case INDEX_op_st_vec:
|
|
|
++ return C_O0_I2(w, r);
|
|
|
++ case INDEX_op_dup_vec:
|
|
|
++ return C_O1_I1(w, wr);
|
|
|
++ case INDEX_op_or_vec:
|
|
|
++ case INDEX_op_andc_vec:
|
|
|
++ return C_O1_I2(w, w, w);
|
|
|
++ case INDEX_op_and_vec:
|
|
|
++ case INDEX_op_orc_vec:
|
|
|
++ return C_O1_I2(w, w, w);
|
|
|
++ case INDEX_op_cmp_vec:
|
|
|
++ return C_O1_I2(w, w, w);
|
|
|
++ case INDEX_op_bitsel_vec:
|
|
|
++ return C_O1_I3(w, w, w, w);
|
|
|
++
|
|
|
+ default:
|
|
|
+ g_assert_not_reached();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static const int tcg_target_reg_alloc_order[] = {
|
|
|
++
|
|
|
++ // General purpose registers, in preference-of-allocation order.
|
|
|
++ TCG_REG_R8,
|
|
|
++ TCG_REG_R9,
|
|
|
++ TCG_REG_R10,
|
|
|
++ TCG_REG_R11,
|
|
|
++ TCG_REG_R12,
|
|
|
++ TCG_REG_R13,
|
|
|
+ TCG_REG_R0,
|
|
|
+ TCG_REG_R1,
|
|
|
+ TCG_REG_R2,
|
|
|
+@@ -218,16 +231,15 @@ static const int tcg_target_reg_alloc_order[] = {
|
|
|
+ TCG_REG_R5,
|
|
|
+ TCG_REG_R6,
|
|
|
+ TCG_REG_R7,
|
|
|
+- TCG_REG_R8,
|
|
|
+- TCG_REG_R9,
|
|
|
+- TCG_REG_R10,
|
|
|
+- TCG_REG_R11,
|
|
|
+- TCG_REG_R12,
|
|
|
+- TCG_REG_R13,
|
|
|
+- /*
|
|
|
+- TCG_REG_R14, // AREG0
|
|
|
+- TCG_REG_R15, // SP
|
|
|
+- */
|
|
|
++
|
|
|
++ // Note: we do not allocate R14 or R15, as they're used for our
|
|
|
++ // special-purpose values.
|
|
|
++
|
|
|
++ // We'll use the high 16 vector register; avoiding the call-saved lower ones.
|
|
|
++ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
|
|
|
++ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
|
|
|
++ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
|
|
|
++ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
|
|
|
+ };
|
|
|
+
|
|
|
+ #if MAX_OPC_PARAM_IARGS != 7
|
|
|
+@@ -248,7 +260,7 @@ static const int tcg_target_call_oarg_regs[] = {
|
|
|
+ };
|
|
|
+
|
|
|
+ #ifdef CONFIG_DEBUG_TCG
|
|
|
+-static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
|
|
|
++static const char *const tcg_target_reg_names[TCG_TARGET_GP_REGS] = {
|
|
|
+ "r00",
|
|
|
+ "r01",
|
|
|
+ "r02",
|
|
|
+@@ -268,6 +280,98 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
|
|
|
+ };
|
|
|
+ #endif
|
|
|
+
|
|
|
++/*************************
|
|
|
++ * TCG Emitter Helpers *
|
|
|
++ *************************/
|
|
|
++
|
|
|
++/* Bitfield n...m (in 32 bit value). */
|
|
|
++#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
|
|
|
++
|
|
|
++/**
|
|
|
++ * Macro that defines a look-up tree for named QEMU_LD gadgets.
|
|
|
++ */
|
|
|
++#define LD_MEMOP_LOOKUP(variable, arg, suffix) \
|
|
|
++ switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
|
|
|
++ case MO_UB: variable = gadget_qemu_ld_ub_ ## suffix; break; \
|
|
|
++ case MO_SB: variable = gadget_qemu_ld_sb_ ## suffix; break; \
|
|
|
++ case MO_LEUW: variable = gadget_qemu_ld_leuw_ ## suffix; break; \
|
|
|
++ case MO_LESW: variable = gadget_qemu_ld_lesw_ ## suffix; break; \
|
|
|
++ case MO_LEUL: variable = gadget_qemu_ld_leul_ ## suffix; break; \
|
|
|
++ case MO_LESL: variable = gadget_qemu_ld_lesl_ ## suffix; break; \
|
|
|
++ case MO_LEUQ: variable = gadget_qemu_ld_leq_ ## suffix; break; \
|
|
|
++ case MO_BEUW: variable = gadget_qemu_ld_beuw_ ## suffix; break; \
|
|
|
++ case MO_BESW: variable = gadget_qemu_ld_besw_ ## suffix; break; \
|
|
|
++ case MO_BEUL: variable = gadget_qemu_ld_beul_ ## suffix; break; \
|
|
|
++ case MO_BESL: variable = gadget_qemu_ld_besl_ ## suffix; break; \
|
|
|
++ case MO_BEUQ: variable = gadget_qemu_ld_beq_ ## suffix; break; \
|
|
|
++ default: \
|
|
|
++ g_assert_not_reached(); \
|
|
|
++ }
|
|
|
++#define LD_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
|
|
|
++ if (a_bits >= s_bits) { \
|
|
|
++ LD_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
|
|
|
++ } else { \
|
|
|
++ LD_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++
|
|
|
++/**
|
|
|
++ * Macro that defines a look-up tree for named QEMU_ST gadgets.
|
|
|
++ */
|
|
|
++#define ST_MEMOP_LOOKUP(variable, arg, suffix) \
|
|
|
++ switch (get_memop(arg) & (MO_BSWAP | MO_SSIZE)) { \
|
|
|
++ case MO_UB: variable = gadget_qemu_st_ub_ ## suffix; break; \
|
|
|
++ case MO_LEUW: variable = gadget_qemu_st_leuw_ ## suffix; break; \
|
|
|
++ case MO_LEUL: variable = gadget_qemu_st_leul_ ## suffix; break; \
|
|
|
++ case MO_LEUQ: variable = gadget_qemu_st_leq_ ## suffix; break; \
|
|
|
++ case MO_BEUW: variable = gadget_qemu_st_beuw_ ## suffix; break; \
|
|
|
++ case MO_BEUL: variable = gadget_qemu_st_beul_ ## suffix; break; \
|
|
|
++ case MO_BEUQ: variable = gadget_qemu_st_beq_ ## suffix; break; \
|
|
|
++ default: \
|
|
|
++ g_assert_not_reached(); \
|
|
|
++ }
|
|
|
++#define ST_MEMOP_HANDLER(variable, arg, suffix, a_bits, s_bits) \
|
|
|
++ if (a_bits >= s_bits) { \
|
|
|
++ ST_MEMOP_LOOKUP(variable, arg, aligned_ ## suffix ); \
|
|
|
++ } else { \
|
|
|
++ ST_MEMOP_LOOKUP(gadget, arg, unaligned_ ## suffix); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++#define LOOKUP_SPECIAL_CASE_LDST_GADGET(arg, name, mode) \
|
|
|
++ switch(TLB_MASK_TABLE_OFS(get_mmuidx(arg))) { \
|
|
|
++ case -32: \
|
|
|
++ gadget = (a_bits >= s_bits) ? \
|
|
|
++ gadget_qemu_ ## name ## _aligned_ ## mode ## _off32_i64 : \
|
|
|
++ gadget_qemu_ ## name ## _unaligned_ ## mode ## _off32_i64; \
|
|
|
++ break; \
|
|
|
++ case -48: \
|
|
|
++ gadget = (a_bits >= s_bits) ? \
|
|
|
++ gadget_qemu_ ## name ## _aligned_ ## mode ## _off48_i64 : \
|
|
|
++ gadget_qemu_ ## name ## _unaligned_ ## mode ## _off48_i64; \
|
|
|
++ break; \
|
|
|
++ case -64: \
|
|
|
++ gadget = (a_bits >= s_bits) ? \
|
|
|
++ gadget_qemu_ ## name ## _aligned_ ## mode ## _off64_i64 : \
|
|
|
++ gadget_qemu_ ## name ## _unaligned_ ## mode ## _off64_i64; \
|
|
|
++ break; \
|
|
|
++ case -96: \
|
|
|
++ gadget = (a_bits >= s_bits) ? \
|
|
|
++ gadget_qemu_ ## name ## _aligned_ ## mode ## _off96_i64 : \
|
|
|
++ gadget_qemu_ ## name ## _unaligned_ ## mode ## _off96_i64; \
|
|
|
++ break; \
|
|
|
++ case -128: \
|
|
|
++ gadget = (a_bits >= s_bits) ? \
|
|
|
++ gadget_qemu_ ## name ## _aligned_ ## mode ## _off128_i64 : \
|
|
|
++ gadget_qemu_ ## name ## _unaligned_ ## mode ## _off128_i64; \
|
|
|
++ break;\
|
|
|
++ default: \
|
|
|
++ gadget = gadget_qemu_ ## name ## _slowpath_ ## mode ## _off0_i64; \
|
|
|
++ break; \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
+ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
|
|
|
+ intptr_t value, intptr_t addend)
|
|
|
+ {
|
|
|
+@@ -363,48 +467,51 @@ tcg_target_ulong helper_be_ldul_mmu_signed(CPUArchState *env, target_ulong addr,
|
|
|
+
|
|
|
+
|
|
|
+ /* Write gadget pointer. */
|
|
|
+-static void tcg_out_nullary_gadget(TCGContext *s, void *gadget)
|
|
|
++static void tcg_out_gadget(TCGContext *s, const void *gadget)
|
|
|
+ {
|
|
|
+ tcg_out_immediate(s, (tcg_target_ulong)gadget);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Write gadget pointer, plus 64b immediate. */
|
|
|
+-static void tcg_out_imm64_gadget(TCGContext *s, void *gadget, tcg_target_ulong immediate)
|
|
|
++static void tcg_out_imm64_gadget(TCGContext *s, const void *gadget, tcg_target_ulong immediate)
|
|
|
+ {
|
|
|
+- tcg_out_nullary_gadget(s, gadget);
|
|
|
++ tcg_out_gadget(s, gadget);
|
|
|
+ tcg_out64(s, immediate);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /* Write gadget pointer (one register). */
|
|
|
+-static void tcg_out_unary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS], unsigned reg0)
|
|
|
++static void tcg_out_unary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS], unsigned reg0)
|
|
|
+ {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_base[reg0]);
|
|
|
++ tcg_out_gadget(s, gadget_base[reg0]);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /* Write gadget pointer (two registers). */
|
|
|
+-static void tcg_out_binary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS], unsigned reg0, unsigned reg1)
|
|
|
++static void tcg_out_binary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1)
|
|
|
+ {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_base[reg0][reg1]);
|
|
|
++ tcg_out_gadget(s, gadget_base[reg0][reg1]);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /* Write gadget pointer (three registers). */
|
|
|
+-static void tcg_out_ternary_gadget(TCGContext *s, void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS], unsigned reg0, unsigned reg1, unsigned reg2)
|
|
|
++static void tcg_out_ternary_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS], unsigned reg0, unsigned reg1, unsigned reg2)
|
|
|
+ {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_base[reg0][reg1][reg2]);
|
|
|
++ tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
|
|
|
+ }
|
|
|
+
|
|
|
++/***************************
|
|
|
++ * TCG Scalar Operations *
|
|
|
++ ***************************/
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Version of our LDST generator that defers to more optimized gadgets selectively.
|
|
|
+ */
|
|
|
+-static void tcg_out_ldst_gadget_inner(TCGContext *s,
|
|
|
+- void *gadget_base[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS],
|
|
|
+- void *gadget_pos_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
+- void *gadget_shifted_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
+- void *gadget_neg_imm[TCG_TARGET_NB_REGS][TCG_TARGET_NB_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
++static void tcg_out_ldst_gadget_inner(TCGContext *s,
|
|
|
++ const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS],
|
|
|
++ const void *gadget_pos_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
++ const void *gadget_shifted_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
++ const void *gadget_neg_imm[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN],
|
|
|
+ unsigned reg0, unsigned reg1, uint32_t offset)
|
|
|
+ {
|
|
|
+ int64_t extended_offset = (int32_t)offset;
|
|
|
+@@ -415,7 +522,7 @@ static void tcg_out_ldst_gadget_inner(TCGContext *s,
|
|
|
+
|
|
|
+ // We handle positive and negative gadgets separately, in order to allow for asymmetrical
|
|
|
+ // collections of pre-made gadgets.
|
|
|
+- if (!is_negative)
|
|
|
++ if (!is_negative)
|
|
|
+ {
|
|
|
+ uint64_t shifted_offset = (extended_offset >> 3);
|
|
|
+ bool aligned_to_8B = ((extended_offset & 0b111) == 0);
|
|
|
+@@ -425,23 +532,23 @@ static void tcg_out_ldst_gadget_inner(TCGContext *s,
|
|
|
+
|
|
|
+ // More optimal case: we have a gadget that directly encodes the argument.
|
|
|
+ if (have_optimized_gadget) {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_pos_imm[reg0][reg1][extended_offset]);
|
|
|
++ tcg_out_gadget(s, gadget_pos_imm[reg0][reg1][extended_offset]);
|
|
|
+ return;
|
|
|
+- }
|
|
|
++ }
|
|
|
+
|
|
|
+ // Special case: it's frequent to have low-numbered positive offsets that are aligned
|
|
|
+ // to 16B boundaries
|
|
|
+ else if(aligned_to_8B && have_shifted_gadget) {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_shifted_imm[reg0][reg1][shifted_offset]);
|
|
|
++ tcg_out_gadget(s, gadget_shifted_imm[reg0][reg1][shifted_offset]);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+- }
|
|
|
++ }
|
|
|
+ else {
|
|
|
+ uint64_t negated_offset = -(extended_offset);
|
|
|
+
|
|
|
+ // More optimal case: we have a gadget that directly encodes the argument.
|
|
|
+ if (negated_offset < TCTI_GADGET_IMMEDIATE_ARRAY_LEN) {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_neg_imm[reg0][reg1][negated_offset]);
|
|
|
++ tcg_out_gadget(s, gadget_neg_imm[reg0][reg1][negated_offset]);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -473,40 +580,90 @@ static void tcti_out_label(TCGContext *s, TCGLabel *label)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-/**
|
|
|
+- * Generate a register-to-register MOV.
|
|
|
+- */
|
|
|
++
|
|
|
++/* Register to register move using ORR (shifted register with no shift). */
|
|
|
++static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
|
|
|
++{
|
|
|
++ switch(ext) {
|
|
|
++ case TCG_TYPE_I32:
|
|
|
++ tcg_out_binary_gadget(s, gadget_mov_i32, rd, rm);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case TCG_TYPE_I64:
|
|
|
++ tcg_out_binary_gadget(s, gadget_mov_i64, rd, rm);
|
|
|
++ break;
|
|
|
++
|
|
|
++ default:
|
|
|
++ g_assert_not_reached();
|
|
|
++
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
+ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
|
|
|
+ {
|
|
|
+- tcg_debug_assert(ret != arg);
|
|
|
++ TCGReg w_ret = (ret - TCG_REG_V16);
|
|
|
++ TCGReg w_arg = (arg - TCG_REG_V16);
|
|
|
+
|
|
|
+- if (type == TCG_TYPE_I32) {
|
|
|
+- tcg_out_binary_gadget(s, gadget_mov_i32, ret, arg);
|
|
|
+- } else {
|
|
|
+- tcg_out_binary_gadget(s, gadget_mov_i64, ret, arg);
|
|
|
++ if (ret == arg) {
|
|
|
++ return true;
|
|
|
+ }
|
|
|
+
|
|
|
++ switch (type) {
|
|
|
++ case TCG_TYPE_I32:
|
|
|
++ case TCG_TYPE_I64:
|
|
|
++
|
|
|
++ // If this is a GP to GP register mov, issue our standard MOV.
|
|
|
++ if (ret < 32 && arg < 32) {
|
|
|
++ tcg_out_movr(s, type, ret, arg);
|
|
|
++ break;
|
|
|
++ }
|
|
|
++ // If this is a vector register to GP, issue a UMOV.
|
|
|
++ else if (ret < 32) {
|
|
|
++ void *gadget = (type == TCG_TYPE_I32) ? gadget_umov_s0 : gadget_umov_d0;
|
|
|
++ tcg_out_binary_gadget(s, gadget, ret, w_arg);
|
|
|
++ break;
|
|
|
++ }
|
|
|
++
|
|
|
++ // If this is a GP to vector move, insert the vealue using INS.
|
|
|
++ else if (arg < 32) {
|
|
|
++ void *gadget = (type == TCG_TYPE_I32) ? gadget_ins_s0 : gadget_ins_d0;
|
|
|
++ tcg_out_binary_gadget(s, gadget, w_ret, arg);
|
|
|
++ break;
|
|
|
++ }
|
|
|
++ /* FALLTHRU */
|
|
|
++
|
|
|
++ case TCG_TYPE_V64:
|
|
|
++ tcg_debug_assert(ret >= 32 && arg >= 32);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_or_d, w_ret, w_arg, w_arg);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case TCG_TYPE_V128:
|
|
|
++ tcg_debug_assert(ret >= 32 && arg >= 32);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_or_q, w_ret, w_arg, w_arg);
|
|
|
++ break;
|
|
|
+
|
|
|
++ default:
|
|
|
++ g_assert_not_reached();
|
|
|
++ }
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
++
|
|
|
+ static void tcg_out_movi_i32(TCGContext *s, TCGReg t0, tcg_target_long arg)
|
|
|
+ {
|
|
|
+ bool is_negative = (arg < 0);
|
|
|
+
|
|
|
+ // We handle positive and negative gadgets separately, in order to allow for asymmetrical
|
|
|
+ // collections of pre-made gadgets.
|
|
|
+- if (!is_negative)
|
|
|
++ if (!is_negative)
|
|
|
+ {
|
|
|
+ // More optimal case: we have a gadget that directly encodes the argument.
|
|
|
+ if (arg < ARRAY_SIZE(gadget_movi_imm_i32[t0])) {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_movi_imm_i32[t0][arg]);
|
|
|
++ tcg_out_gadget(s, gadget_movi_imm_i32[t0][arg]);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+- }
|
|
|
+- else {
|
|
|
+-
|
|
|
+ }
|
|
|
+
|
|
|
+ // Emit the mov and its immediate.
|
|
|
+@@ -521,16 +678,13 @@ static void tcg_out_movi_i64(TCGContext *s, TCGReg t0, tcg_target_long arg)
|
|
|
+
|
|
|
+ // We handle positive and negative gadgets separately, in order to allow for asymmetrical
|
|
|
+ // collections of pre-made gadgets.
|
|
|
+- if (!is_negative)
|
|
|
++ if (!is_negative)
|
|
|
+ {
|
|
|
+ // More optimal case: we have a gadget that directly encodes the argument.
|
|
|
+ if (arg < ARRAY_SIZE(gadget_movi_imm_i64[t0])) {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_movi_imm_i64[t0][arg]);
|
|
|
++ tcg_out_gadget(s, gadget_movi_imm_i64[t0][arg]);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+- }
|
|
|
+- else {
|
|
|
+-
|
|
|
+ }
|
|
|
+
|
|
|
+ // TODO: optimize the negative case, too?
|
|
|
+@@ -558,7 +712,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg t0, tcg_target_long
|
|
|
+ */
|
|
|
+ static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
|
|
|
+ {
|
|
|
+- tcg_out_nullary_gadget(s, gadget_call);
|
|
|
++ tcg_out_gadget(s, gadget_call);
|
|
|
+ tcg_out64(s, (uintptr_t)arg);
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -570,9 +724,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
|
|
|
+ {
|
|
|
+
|
|
|
+ if (type == TCG_TYPE_I32) {
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld32u, ret, arg1, arg2);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld32u, ret, arg1, arg2);
|
|
|
+ } else {
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld_i64, ret, arg1, arg2);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld_i64, ret, arg1, arg2);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -598,7 +752,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ // to patch our gadget stream with the target address, later.
|
|
|
+ if (s->tb_jmp_insn_offset) {
|
|
|
+ // Emit our gadget.
|
|
|
+- tcg_out_nullary_gadget(s, gadget_br);
|
|
|
++ tcg_out_gadget(s, gadget_br);
|
|
|
+
|
|
|
+ // Place our current instruction into our "relocation table", so it can
|
|
|
+ // be patched once we know where the branch will target...
|
|
|
+@@ -617,7 +771,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ // Simple branch.
|
|
|
+ case INDEX_op_br:
|
|
|
+- tcg_out_nullary_gadget(s, gadget_br);
|
|
|
++ tcg_out_gadget(s, gadget_br);
|
|
|
+ tcti_out_label(s, arg_label(args[0]));
|
|
|
+ break;
|
|
|
+
|
|
|
+@@ -678,41 +832,41 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ case INDEX_op_ld8u_i32:
|
|
|
+ case INDEX_op_ld8u_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld8u, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld8u, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld8s_i32:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld8s_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld8s_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld8s_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld8s_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld8s_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld16u_i32:
|
|
|
+ case INDEX_op_ld16u_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld16u, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld16u, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld16s_i32:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld16s_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld16s_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld16s_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld16s_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld16s_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld_i32:
|
|
|
+ case INDEX_op_ld32u_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld32u, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld32u, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_ld_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+-
|
|
|
++
|
|
|
+ case INDEX_op_ld32s_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_ld32s_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_ld32s_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+
|
|
|
+@@ -721,155 +875,169 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ */
|
|
|
+ case INDEX_op_st8_i32:
|
|
|
+ case INDEX_op_st8_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st8, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st8, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_st16_i32:
|
|
|
+ case INDEX_op_st16_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st16, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st16, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_st_i32:
|
|
|
+ case INDEX_op_st32_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_st_i64:
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Arithmetic instructions.
|
|
|
+ */
|
|
|
+
|
|
|
+- case INDEX_op_add_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_add_i32, args[0], args[1], args[2]);
|
|
|
++ case INDEX_op_add_i32:
|
|
|
++ tcg_out_ternary_gadget(s, gadget_add_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_sub_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_sub_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_sub_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_mul_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_mul_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_mul_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_nand_i32: /* Optional (TCG_TARGET_HAS_nand_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_nand_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_nor_i32: /* Optional (TCG_TARGET_HAS_nor_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_nor_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_and_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_and_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_and_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_andc_i32: /* Optional (TCG_TARGET_HAS_andc_i32). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_andc_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_andc_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_orc_i32: /* Optional (TCG_TARGET_HAS_orc_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_orc_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_orc_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_eqv_i32: /* Optional (TCG_TARGET_HAS_orc_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_eqv_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_eqv_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_or_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_or_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_or_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_xor_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_xor_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_xor_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_shl_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_shl_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_shl_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_shr_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_shr_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_shr_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_sar_i32:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_sar_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_sar_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+- //case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
|
|
|
+- // tcg_out_ternary_gadget(s, gadget_rotr_i32, args[0], args[1], args[2]);
|
|
|
+- // break;
|
|
|
++ case INDEX_op_rotr_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rotr_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
+
|
|
|
+- //case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
|
|
|
+- // tcg_out_ternary_gadget(s, gadget_rotl_i32, args[0], args[1], args[2]);
|
|
|
++ case INDEX_op_rotl_i32: /* Optional (TCG_TARGET_HAS_rot_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rotl_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
+
|
|
|
+ case INDEX_op_add_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_add_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_add_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_sub_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_sub_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_sub_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_mul_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_mul_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_mul_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_and_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_and_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_and_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_andc_i64: /* Optional (TCG_TARGET_HAS_andc_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_andc_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_andc_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_orc_i64: /* Optional (TCG_TARGET_HAS_orc_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_orc_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_orc_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_eqv_i64: /* Optional (TCG_TARGET_HAS_eqv_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_eqv_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_eqv_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_nand_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+- //case INDEX_op_nand_i64: /* Optional (TCG_TARGET_HAS_nand_i64). */
|
|
|
+- //case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */
|
|
|
++ case INDEX_op_nor_i64: /* Optional (TCG_TARGET_HAS_nor_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_nor_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
+
|
|
|
+ case INDEX_op_or_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_or_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_or_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_xor_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_xor_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_xor_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_shl_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_shl_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_shl_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_shr_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_shr_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_shr_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_sar_i64:
|
|
|
+- tcg_out_ternary_gadget(s, gadget_sar_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_sar_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+- //case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
|
|
|
+- // tcg_out_ternary_gadget(s, gadget_rotl_i64, args[0], args[1], args[2]);
|
|
|
+- // break;
|
|
|
++ case INDEX_op_rotl_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rotl_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
+
|
|
|
+- //case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
|
|
|
+- // tcg_out_ternary_gadget(s, gadget_rotr_i64, args[0], args[1], args[2]);
|
|
|
+- // break;
|
|
|
++ case INDEX_op_rotr_i64: /* Optional (TCG_TARGET_HAS_rot_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rotr_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
+
|
|
|
+ case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_div_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_div_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_divu_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_divu_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_rem_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rem_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_remu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_remu_i64, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_remu_i64, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_brcond_i64:
|
|
|
+@@ -898,7 +1066,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ // helps the processor's branch prediction be less "squished", as not every
|
|
|
+ // branch is going throuh the same instruction.
|
|
|
+ tcg_out_ternary_gadget(s, gadget, last_brcond_i64, args[0], args[1]);
|
|
|
+- last_brcond_i64 = (last_brcond_i64 + 1) % TCG_TARGET_NB_REGS;
|
|
|
++ last_brcond_i64 = (last_brcond_i64 + 1) % TCG_TARGET_GP_REGS;
|
|
|
+
|
|
|
+ // Branch target immediate.
|
|
|
+ tcti_out_label(s, arg_label(args[3]));
|
|
|
+@@ -928,6 +1096,14 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ tcg_out_binary_gadget(s, gadget_neg_i64, args[0], args[1]);
|
|
|
+ break;
|
|
|
+
|
|
|
++ case INDEX_op_clz_i64: /* Optional (TCG_TARGET_HAS_clz_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_clz_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_ctz_i64: /* Optional (TCG_TARGET_HAS_ctz_i64). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_ctz_i64, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
+ case INDEX_op_ext8s_i64: /* Optional (TCG_TARGET_HAS_ext8s_i64). */
|
|
|
+ tcg_out_binary_gadget(s, gadget_ext8s_i64, args[0], args[1]);
|
|
|
+ break;
|
|
|
+@@ -956,10 +1132,26 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ tcg_out_binary_gadget(s, gadget_ext32u_i64, args[0], args[1]);
|
|
|
+ break;
|
|
|
+
|
|
|
++ case INDEX_op_extrl_i64_i32:
|
|
|
++ tcg_out_binary_gadget(s, gadget_extrl, args[0], args[1]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_extrh_i64_i32:
|
|
|
++ tcg_out_binary_gadget(s, gadget_extrh, args[0], args[1]);
|
|
|
++ break;
|
|
|
++
|
|
|
+ case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */
|
|
|
+ tcg_out_binary_gadget(s, gadget_neg_i32, args[0], args[1]);
|
|
|
+ break;
|
|
|
+
|
|
|
++ case INDEX_op_clz_i32: /* Optional (TCG_TARGET_HAS_clz_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_clz_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_ctz_i32: /* Optional (TCG_TARGET_HAS_ctz_i32). */
|
|
|
++ tcg_out_ternary_gadget(s, gadget_ctz_i32, args[0], args[1], args[2]);
|
|
|
++ break;
|
|
|
++
|
|
|
+ case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */
|
|
|
+ tcg_out_binary_gadget(s, gadget_not_i32, args[0], args[1]);
|
|
|
+ break;
|
|
|
+@@ -973,19 +1165,19 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_div_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_div_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_divu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_divu_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_divu_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_rem_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_rem_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_rem_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_remu_i32: /* Optional (TCG_TARGET_HAS_div_i32). */
|
|
|
+- tcg_out_ternary_gadget(s, gadget_remu_i32, args[0], args[1], args[2]);
|
|
|
++ tcg_out_ternary_gadget(s, gadget_remu_i32, args[0], args[1], args[2]);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case INDEX_op_brcond_i32:
|
|
|
+@@ -1014,7 +1206,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ // helps the processor's branch prediction be less "squished", as not every
|
|
|
+ // branch is going throuh the same instruction.
|
|
|
+ tcg_out_ternary_gadget(s, gadget, last_brcond_i32, args[0], args[1]);
|
|
|
+- last_brcond_i32 = (last_brcond_i32 + 1) % TCG_TARGET_NB_REGS;
|
|
|
++ last_brcond_i32 = (last_brcond_i32 + 1) % TCG_TARGET_GP_REGS;
|
|
|
+
|
|
|
+ // Branch target immediate.
|
|
|
+ tcti_out_label(s, arg_label(args[3]));
|
|
|
+@@ -1031,6 +1223,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ void *gadget;
|
|
|
+
|
|
|
+ switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
++ case -32: LD_MEMOP_HANDLER(gadget, args[2], off32_i32, a_bits, s_bits); break;
|
|
|
++ case -48: LD_MEMOP_HANDLER(gadget, args[2], off48_i32, a_bits, s_bits); break;
|
|
|
+ case -64: LD_MEMOP_HANDLER(gadget, args[2], off64_i32, a_bits, s_bits); break;
|
|
|
+ case -96: LD_MEMOP_HANDLER(gadget, args[2], off96_i32, a_bits, s_bits); break;
|
|
|
+ case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
|
|
|
+@@ -1038,7 +1232,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ }
|
|
|
+
|
|
|
+ // Args:
|
|
|
+- // - an immediate32 encodes our operation index
|
|
|
++ // - an immediate32 encodes our operation index
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+ tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
|
|
|
+ break;
|
|
|
+@@ -1052,43 +1246,31 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ void *gadget;
|
|
|
+
|
|
|
+- // Special optimization case: if we have an operation/target of 0x3A,
|
|
|
+- // this is a common case. Delegate to our special-case handler.
|
|
|
+- if (args[2] == 0x3a) {
|
|
|
+- switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
+-
|
|
|
+- case -64:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_ld_leq_aligned_mode3a_off64_i64 :
|
|
|
+- gadget_qemu_ld_leq_unaligned_mode3a_off64_i64;
|
|
|
+- break;
|
|
|
+- case -96:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_ld_leq_aligned_mode3a_off96_i64 :
|
|
|
+- gadget_qemu_ld_leq_unaligned_mode3a_off96_i64;
|
|
|
+- break;
|
|
|
+- case -128:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_ld_leq_aligned_mode3a_off128_i64 :
|
|
|
+- gadget_qemu_ld_leq_unaligned_mode3a_off128_i64;
|
|
|
+- break;
|
|
|
+-
|
|
|
+- default:
|
|
|
+- gadget = gadget_qemu_ld_leq_slowpath_mode3a_off0_i64;
|
|
|
+- break;
|
|
|
+- }
|
|
|
++ // Special optimization case: if we have an common case.
|
|
|
++ // Delegate to our special-case handler.
|
|
|
++ if (args[2] == 0x02) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_ub, mode02)
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+- }
|
|
|
++ } else if (args[2] == 0x32) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode32)
|
|
|
++ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
++ } else if(args[2] == 0x3a) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], ld_leq, mode3a)
|
|
|
++ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
++ }
|
|
|
+ // Otherwise, handle the generic case.
|
|
|
+ else {
|
|
|
+ switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
++ case -32: LD_MEMOP_HANDLER(gadget, args[2], off32_i64, a_bits, s_bits); break;
|
|
|
++ case -48: LD_MEMOP_HANDLER(gadget, args[2], off48_i64, a_bits, s_bits); break;
|
|
|
+ case -64: LD_MEMOP_HANDLER(gadget, args[2], off64_i64, a_bits, s_bits); break;
|
|
|
+ case -96: LD_MEMOP_HANDLER(gadget, args[2], off96_i64, a_bits, s_bits); break;
|
|
|
+ case -128: LD_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
|
|
|
+ default: LD_MEMOP_LOOKUP(gadget, args[2], slowpath_off0_i64); break;
|
|
|
+ }
|
|
|
++
|
|
|
+ // Args:
|
|
|
+- // - an immediate32 encodes our operation index
|
|
|
++ // - an immediate32 encodes our operation index
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+ tcg_out64(s, args[2]); // TODO: fix encoding to be 4b
|
|
|
+ }
|
|
|
+@@ -1105,6 +1287,8 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ void *gadget;
|
|
|
+
|
|
|
+ switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
++ case -32: ST_MEMOP_HANDLER(gadget, args[2], off32_i32, a_bits, s_bits); break;
|
|
|
++ case -48: ST_MEMOP_HANDLER(gadget, args[2], off48_i32, a_bits, s_bits); break;
|
|
|
+ case -64: ST_MEMOP_HANDLER(gadget, args[2], off64_i32, a_bits, s_bits); break;
|
|
|
+ case -96: ST_MEMOP_HANDLER(gadget, args[2], off96_i32, a_bits, s_bits); break;
|
|
|
+ case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i32, a_bits, s_bits); break;
|
|
|
+@@ -1113,7 +1297,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ // Args:
|
|
|
+ // - our gadget encodes the target and address registers
|
|
|
+- // - an immediate32 encodes our operation index
|
|
|
++ // - an immediate32 encodes our operation index
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+ tcg_out64(s, args[2]); // FIXME: double encoded
|
|
|
+ break;
|
|
|
+@@ -1127,36 +1311,23 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ void *gadget;
|
|
|
+
|
|
|
+- // Special optimization case: if we have an operation/target of 0x3A,
|
|
|
+- // this is a common case. Delegate to our special-case handler.
|
|
|
+- if (args[2] == 0x3a) {
|
|
|
+- switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
+-
|
|
|
+- case -64:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_st_leq_aligned_mode3a_off64_i64 :
|
|
|
+- gadget_qemu_st_leq_unaligned_mode3a_off64_i64;
|
|
|
+- break;
|
|
|
+- case -96:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_st_leq_aligned_mode3a_off96_i64 :
|
|
|
+- gadget_qemu_st_leq_unaligned_mode3a_off96_i64;
|
|
|
+- break;
|
|
|
+- case -128:
|
|
|
+- gadget = (a_bits >= s_bits) ?
|
|
|
+- gadget_qemu_st_leq_aligned_mode3a_off128_i64 :
|
|
|
+- gadget_qemu_st_leq_unaligned_mode3a_off128_i64;
|
|
|
+- break;
|
|
|
+-
|
|
|
+- default:
|
|
|
+- gadget = gadget_qemu_st_leq_slowpath_mode3a_off0_i64;
|
|
|
+- break;
|
|
|
+- }
|
|
|
++ // Special optimization case: if we have an common case.
|
|
|
++ // Delegate to our special-case handler.
|
|
|
++ if (args[2] == 0x02) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_ub, mode02)
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+- }
|
|
|
++ } else if (args[2] == 0x32) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode32)
|
|
|
++ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
++ } else if(args[2] == 0x3a) {
|
|
|
++ LOOKUP_SPECIAL_CASE_LDST_GADGET(args[2], st_leq, mode3a)
|
|
|
++ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
++ }
|
|
|
+ // Otherwise, handle the generic case.
|
|
|
+ else {
|
|
|
+ switch(TLB_MASK_TABLE_OFS(get_mmuidx(args[2]))) {
|
|
|
++ case -32: ST_MEMOP_HANDLER(gadget, args[2], off32_i64, a_bits, s_bits); break;
|
|
|
++ case -48: ST_MEMOP_HANDLER(gadget, args[2], off48_i64, a_bits, s_bits); break;
|
|
|
+ case -64: ST_MEMOP_HANDLER(gadget, args[2], off64_i64, a_bits, s_bits); break;
|
|
|
+ case -96: ST_MEMOP_HANDLER(gadget, args[2], off96_i64, a_bits, s_bits); break;
|
|
|
+ case -128: ST_MEMOP_HANDLER(gadget, args[2], off128_i64, a_bits, s_bits); break;
|
|
|
+@@ -1165,7 +1336,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+
|
|
|
+ // Args:
|
|
|
+ // - our gadget encodes the target and address registers
|
|
|
+- // - an immediate32 encodes our operation index
|
|
|
++ // - an immediate32 encodes our operation index
|
|
|
+ tcg_out_binary_gadget(s, gadget, args[0], args[1]);
|
|
|
+ tcg_out64(s, args[2]); // FIXME: double encoded
|
|
|
+ }
|
|
|
+@@ -1183,7 +1354,7 @@ void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *con
|
|
|
+ [TCG_MO_LD_ST] = gadget_mb_ld,
|
|
|
+ [TCG_MO_LD_ST | TCG_MO_LD_LD] = gadget_mb_ld,
|
|
|
+ };
|
|
|
+- tcg_out_nullary_gadget(s, sync[args[0] & TCG_MO_ALL]);
|
|
|
++ tcg_out_gadget(s, sync[args[0] & TCG_MO_ALL]);
|
|
|
+
|
|
|
+ break;
|
|
|
+ }
|
|
|
+@@ -1203,9 +1374,9 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
|
|
|
+ intptr_t arg2)
|
|
|
+ {
|
|
|
+ if (type == TCG_TYPE_I32) {
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st_i32, arg, arg1, arg2);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st_i32, arg, arg1, arg2);
|
|
|
+ } else {
|
|
|
+- tcg_out_ldst_gadget(s, gadget_st_i64, arg, arg1, arg2);
|
|
|
++ tcg_out_ldst_gadget(s, gadget_st_i64, arg, arg1, arg2);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -1221,19 +1392,629 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
|
|
|
+ return ct & TCG_CT_CONST;
|
|
|
+ }
|
|
|
+
|
|
|
++/***************************
|
|
|
++ * TCG Vector Operations *
|
|
|
++ ***************************/
|
|
|
++
|
|
|
++//
|
|
|
++// Helper for emitting DUPI (immediate DUP) instructions.
|
|
|
++//
|
|
|
++#define tcg_out_dupi_gadget(s, name, q, rd, op, cmode, arg) \
|
|
|
++ if (q) { \
|
|
|
++ tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q1[rd][arg]); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_gadget(s, gadget_ ## name ## _cmode_ ## cmode ## _op ## op ## _q0[rd][arg]); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++//
|
|
|
++// Helpers for emitting D/Q variant instructions.
|
|
|
++//
|
|
|
++#define tcg_out_dq_gadget(s, name, arity, is_q, args...) \
|
|
|
++ if (is_q) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _q, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _d, args); \
|
|
|
++ }
|
|
|
++
|
|
|
++#define tcg_out_unary_dq_gadget(s, name, is_q, a) \
|
|
|
++ tcg_out_dq_gadget(s, name, unary, is_q, a)
|
|
|
++#define tcg_out_binary_dq_gadget(s, name, is_q, a, b) \
|
|
|
++ tcg_out_dq_gadget(s, name, binary, is_q, a, b)
|
|
|
++#define tcg_out_ternary_dq_gadget(s, name, is_q, a, b, c) \
|
|
|
++ tcg_out_dq_gadget(s, name, ternary, is_q, a, b, c)
|
|
|
++
|
|
|
++
|
|
|
++//
|
|
|
++// Helper for emitting the gadget appropriate for a vector's size.
|
|
|
++//
|
|
|
++#define tcg_out_sized_vector_gadget(s, name, arity, vece, args...) \
|
|
|
++ switch(vece) { \
|
|
|
++ case MO_8: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ case MO_16: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ case MO_32: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ case MO_64: \
|
|
|
++ if (type == TCG_TYPE_V128) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2d, args); \
|
|
|
++ } \
|
|
|
++ else { \
|
|
|
++ g_assert_not_reached(); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ default: \
|
|
|
++ g_assert_not_reached(); \
|
|
|
++ }
|
|
|
++#define tcg_out_sized_vector_gadget_no64(s, name, arity, vece, args...) \
|
|
|
++ switch(vece) { \
|
|
|
++ case MO_8: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8b, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _16b, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ case MO_16: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4h, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _8h, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ case MO_32: \
|
|
|
++ if (type == TCG_TYPE_V64) { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _2s, args); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _4s, args); \
|
|
|
++ } \
|
|
|
++ break; \
|
|
|
++ default: \
|
|
|
++ g_assert_not_reached(); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++#define tcg_out_unary_vector_gadget(s, name, vece, a) \
|
|
|
++ tcg_out_sized_vector_gadget(s, name, unary, vece, a)
|
|
|
++#define tcg_out_binary_vector_gadget(s, name, vece, a, b) \
|
|
|
++ tcg_out_sized_vector_gadget(s, name, binary, vece, a, b)
|
|
|
++#define tcg_out_ternary_vector_gadget(s, name, vece, a, b, c) \
|
|
|
++ tcg_out_sized_vector_gadget(s, name, ternary, vece, a, b, c)
|
|
|
++
|
|
|
++#define tcg_out_ternary_vector_gadget_no64(s, name, vece, a, b, c) \
|
|
|
++ tcg_out_sized_vector_gadget_no64(s, name, ternary, vece, a, b, c)
|
|
|
++
|
|
|
++
|
|
|
++#define tcg_out_ternary_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
|
|
|
++ if (is_scalar) { \
|
|
|
++ tcg_out_ternary_gadget(s, gadget_ ## name ## _scalar, w0, w1, w2); \
|
|
|
++ } else { \
|
|
|
++ tcg_out_ternary_vector_gadget(s, name, vece, w0, w1, w2); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++/* Return true if v16 is a valid 16-bit shifted immediate. */
|
|
|
++static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
|
|
|
++{
|
|
|
++ if (v16 == (v16 & 0xff)) {
|
|
|
++ *cmode = 0x8;
|
|
|
++ *imm8 = v16 & 0xff;
|
|
|
++ return true;
|
|
|
++ } else if (v16 == (v16 & 0xff00)) {
|
|
|
++ *cmode = 0xa;
|
|
|
++ *imm8 = v16 >> 8;
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++ return false;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/** Core vector operation emission. */
|
|
|
++static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece,
|
|
|
++ const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS])
|
|
|
++{
|
|
|
++ TCGType type = vecl + TCG_TYPE_V64;
|
|
|
++ TCGArg r0, r1, r2, r3, w0, w1, w2, w3;
|
|
|
++
|
|
|
++ // Typing flags for vector operations.
|
|
|
++ bool is_v128 = (type == TCG_TYPE_V128);
|
|
|
++ bool is_scalar = !is_v128 && (vece == MO_64);
|
|
|
++
|
|
|
++ // Argument shortcuts.
|
|
|
++ r0 = args[0];
|
|
|
++ r1 = args[1];
|
|
|
++ r2 = args[2];
|
|
|
++ r3 = args[3];
|
|
|
++
|
|
|
++ // Offset argument shortcuts; offset to convert register numbers to gadget numberes.
|
|
|
++ w0 = args[0] - TCG_REG_V16;
|
|
|
++ w1 = args[1] - TCG_REG_V16;
|
|
|
++ w2 = args[2] - TCG_REG_V16;
|
|
|
++ w3 = args[3] - TCG_REG_V16;
|
|
|
++
|
|
|
++ // Argument shortcuts, as signed.
|
|
|
++ int64_t signed_offset_arg = (int32_t)args[2];
|
|
|
++
|
|
|
++ switch (opc) {
|
|
|
++
|
|
|
++ // Load memory -> vector: followed by a 64-bit offset immediate
|
|
|
++ case INDEX_op_ld_vec:
|
|
|
++ tcg_out_binary_dq_gadget(s, ldr, is_v128, w0, r1);
|
|
|
++ tcg_out64(s, signed_offset_arg);
|
|
|
++ break;
|
|
|
++
|
|
|
++ // Store memory -> vector: followed by a 64-bit offset immediate
|
|
|
++ case INDEX_op_st_vec:
|
|
|
++ tcg_out_binary_dq_gadget(s, str, is_v128, w0, r1);
|
|
|
++ tcg_out64(s, signed_offset_arg);
|
|
|
++ break;
|
|
|
++
|
|
|
++ // Duplciate memory to all vector elements.
|
|
|
++ case INDEX_op_dupm_vec:
|
|
|
++ // DUPM handles normalization itself; pass arguments raw.
|
|
|
++ tcg_out_dupm_vec(s, type, vece, r0, r1, r2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_add_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, add, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_sub_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, sub, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_mul_vec: // optional
|
|
|
++ tcg_out_ternary_vector_gadget_no64(s, mul, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_neg_vec: // optional
|
|
|
++ tcg_out_binary_vector_gadget(s, neg, vece, w0, w1);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_abs_vec: // optional
|
|
|
++ tcg_out_binary_vector_gadget(s, abs, vece, w0, w1);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_and_vec: // optional
|
|
|
++ tcg_out_ternary_dq_gadget(s, and, is_v128, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_or_vec:
|
|
|
++ tcg_out_ternary_dq_gadget(s, or, is_v128, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_andc_vec:
|
|
|
++ tcg_out_ternary_dq_gadget(s, andc, is_v128, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_orc_vec: // optional
|
|
|
++ tcg_out_ternary_dq_gadget(s, orc, is_v128, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_xor_vec:
|
|
|
++ tcg_out_ternary_dq_gadget(s, xor, is_v128, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_ssadd_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, ssadd, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_sssub_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, sssub, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_usadd_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, usadd, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_ussub_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, ussub, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_smax_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_no64(s, smax, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_smin_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_no64(s, smin, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_umax_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_no64(s, umax, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_umin_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_no64(s, umin, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_not_vec: // optional
|
|
|
++ tcg_out_binary_dq_gadget(s, not, is_v128, w0, w1);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_shlv_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, shlv, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_aa64_sshl_vec:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, sshl, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_cmp_vec:
|
|
|
++ switch (args[3]) {
|
|
|
++ case TCG_COND_EQ:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++ case TCG_COND_NE:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmeq, is_scalar, vece, w0, w1, w2);
|
|
|
++ tcg_out_binary_dq_gadget(s, not, is_v128, w0, w0);
|
|
|
++ break;
|
|
|
++ case TCG_COND_GT:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++ case TCG_COND_LE:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmgt, is_scalar, vece, w0, w2, w1);
|
|
|
++ break;
|
|
|
++ case TCG_COND_GE:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++ case TCG_COND_LT:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmge, is_scalar, vece, w0, w2, w1);
|
|
|
++ break;
|
|
|
++ case TCG_COND_GTU:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++ case TCG_COND_LEU:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmhi, is_scalar, vece, w0, w2, w1);
|
|
|
++ break;
|
|
|
++ case TCG_COND_GEU:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w1, w2);
|
|
|
++ break;
|
|
|
++ case TCG_COND_LTU:
|
|
|
++ tcg_out_ternary_vector_gadget_with_scalar(s, cmhs, is_scalar, vece, w0, w2, w1);
|
|
|
++ break;
|
|
|
++ default:
|
|
|
++ g_assert_not_reached();
|
|
|
++ }
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_bitsel_vec: // optional
|
|
|
++ {
|
|
|
++ if (r0 == r3) {
|
|
|
++ tcg_out_ternary_dq_gadget(s, bit, is_v128, w0, w2, w1);
|
|
|
++ } else if (r0 == r2) {
|
|
|
++ tcg_out_ternary_dq_gadget(s, bif, is_v128, w0, w3, w1);
|
|
|
++ } else {
|
|
|
++ if (r0 != r1) {
|
|
|
++ tcg_out_mov(s, type, r0, r1);
|
|
|
++ }
|
|
|
++ tcg_out_ternary_dq_gadget(s, bsl, is_v128, w0, w2, w3);
|
|
|
++ }
|
|
|
++ break;
|
|
|
++ }
|
|
|
++
|
|
|
++ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
|
|
|
++ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
|
|
|
++ default:
|
|
|
++ g_assert_not_reached();
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
|
|
++{
|
|
|
++ switch (opc) {
|
|
|
++ case INDEX_op_add_vec:
|
|
|
++ case INDEX_op_sub_vec:
|
|
|
++ case INDEX_op_and_vec:
|
|
|
++ case INDEX_op_or_vec:
|
|
|
++ case INDEX_op_xor_vec:
|
|
|
++ case INDEX_op_andc_vec:
|
|
|
++ case INDEX_op_orc_vec:
|
|
|
++ case INDEX_op_neg_vec:
|
|
|
++ case INDEX_op_abs_vec:
|
|
|
++ case INDEX_op_not_vec:
|
|
|
++ case INDEX_op_cmp_vec:
|
|
|
++ case INDEX_op_ssadd_vec:
|
|
|
++ case INDEX_op_sssub_vec:
|
|
|
++ case INDEX_op_usadd_vec:
|
|
|
++ case INDEX_op_ussub_vec:
|
|
|
++ case INDEX_op_shlv_vec:
|
|
|
++ case INDEX_op_bitsel_vec:
|
|
|
++ return 1;
|
|
|
++ case INDEX_op_rotli_vec:
|
|
|
++ case INDEX_op_shrv_vec:
|
|
|
++ case INDEX_op_sarv_vec:
|
|
|
++ case INDEX_op_rotlv_vec:
|
|
|
++ case INDEX_op_rotrv_vec:
|
|
|
++ return -1;
|
|
|
++ case INDEX_op_mul_vec:
|
|
|
++ case INDEX_op_smax_vec:
|
|
|
++ case INDEX_op_smin_vec:
|
|
|
++ case INDEX_op_umax_vec:
|
|
|
++ case INDEX_op_umin_vec:
|
|
|
++ return vece < MO_64;
|
|
|
++
|
|
|
++ default:
|
|
|
++ return 0;
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
|
|
|
++ TCGArg a0, ...)
|
|
|
++{
|
|
|
++ va_list va;
|
|
|
++ TCGv_vec v0, v1, v2, t1, t2, c1;
|
|
|
++ TCGArg a2;
|
|
|
++
|
|
|
++
|
|
|
++ va_start(va, a0);
|
|
|
++ v0 = temp_tcgv_vec(arg_temp(a0));
|
|
|
++ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
|
|
|
++ a2 = va_arg(va, TCGArg);
|
|
|
++ va_end(va);
|
|
|
++
|
|
|
++ switch (opc) {
|
|
|
++ case INDEX_op_shrv_vec:
|
|
|
++ case INDEX_op_sarv_vec:
|
|
|
++ /* Right shifts are negative left shifts for AArch64. */
|
|
|
++ v2 = temp_tcgv_vec(arg_temp(a2));
|
|
|
++ t1 = tcg_temp_new_vec(type);
|
|
|
++ tcg_gen_neg_vec(vece, t1, v2);
|
|
|
++ opc = (opc == INDEX_op_shrv_vec
|
|
|
++ ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
|
|
|
++ vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
|
|
|
++ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
|
|
|
++ tcg_temp_free_vec(t1);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_rotlv_vec:
|
|
|
++ v2 = temp_tcgv_vec(arg_temp(a2));
|
|
|
++ t1 = tcg_temp_new_vec(type);
|
|
|
++ c1 = tcg_constant_vec(type, vece, 8 << vece);
|
|
|
++ tcg_gen_sub_vec(vece, t1, v2, c1);
|
|
|
++ /* Right shifts are negative left shifts for AArch64. */
|
|
|
++ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
|
|
|
++ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
|
|
|
++ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
|
|
|
++ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
|
|
|
++ tcg_gen_or_vec(vece, v0, v0, t1);
|
|
|
++ tcg_temp_free_vec(t1);
|
|
|
++ break;
|
|
|
++
|
|
|
++ case INDEX_op_rotrv_vec:
|
|
|
++ v2 = temp_tcgv_vec(arg_temp(a2));
|
|
|
++ t1 = tcg_temp_new_vec(type);
|
|
|
++ t2 = tcg_temp_new_vec(type);
|
|
|
++ c1 = tcg_constant_vec(type, vece, 8 << vece);
|
|
|
++ tcg_gen_neg_vec(vece, t1, v2);
|
|
|
++ tcg_gen_sub_vec(vece, t2, c1, v2);
|
|
|
++ /* Right shifts are negative left shifts for AArch64. */
|
|
|
++ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
|
|
|
++ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
|
|
|
++ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
|
|
|
++ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
|
|
|
++ tcg_gen_or_vec(vece, v0, t1, t2);
|
|
|
++ tcg_temp_free_vec(t1);
|
|
|
++ tcg_temp_free_vec(t2);
|
|
|
++ break;
|
|
|
++
|
|
|
++ default:
|
|
|
++ g_assert_not_reached();
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/* Generate DUPI (move immediate) vector ops. */
|
|
|
++static bool tcg_out_optimized_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
|
|
|
++{
|
|
|
++ bool q = (type == TCG_TYPE_V128);
|
|
|
++ int cmode, imm8, i;
|
|
|
++
|
|
|
++ // If we're copying an 8b immediate, we implicitly have a simple gadget for this,
|
|
|
++ // since there are only 256 possible values * 16 registers. Emit a MOVI gadget implicitly.
|
|
|
++ if (vece == MO_8) {
|
|
|
++ imm8 = (uint8_t)v64;
|
|
|
++ tcg_out_dupi_gadget(s, movi, q, rd, 0, e, imm8);
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ // Otherwise, if we have a value that's all 0x00 and 0xFF bytes,
|
|
|
++ // we can use the scalar variant of MOVI (op=1, cmode=e), which handles
|
|
|
++ // that case directly.
|
|
|
++ for (i = imm8 = 0; i < 8; i++) {
|
|
|
++ uint8_t byte = v64 >> (i * 8);
|
|
|
++ if (byte == 0xff) {
|
|
|
++ imm8 |= 1 << i;
|
|
|
++ } else if (byte != 0) {
|
|
|
++ goto fail_bytes;
|
|
|
++ }
|
|
|
++ }
|
|
|
++ tcg_out_dupi_gadget(s, movi, q, rd, 1, e, imm8);
|
|
|
++ return true;
|
|
|
++ fail_bytes:
|
|
|
++
|
|
|
++ // Handle 16B moves.
|
|
|
++ if (vece == MO_16) {
|
|
|
++ uint16_t v16 = v64;
|
|
|
++
|
|
|
++ // Check to see if we have a value representable in as a MOV imm8, possibly via a shift.
|
|
|
++ if (is_shimm16(v16, &cmode, &imm8)) {
|
|
|
++ // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
|
|
|
++ if (cmode == 0x8) {
|
|
|
++ tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, imm8);
|
|
|
++ } else {
|
|
|
++ tcg_out_dupi_gadget(s, movi, q, rd, 0, a, imm8);
|
|
|
++ }
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ // Check to see if we have a value representable in as an inverted MOV imm8, possibly via a shift.
|
|
|
++ if (is_shimm16(~v16, &cmode, &imm8)) {
|
|
|
++ // Output the corret instruction CMode for either a regular MOVI (8) or a LSL8 MOVI (a).
|
|
|
++ if (cmode == 0x8) {
|
|
|
++ tcg_out_dupi_gadget(s, mvni, q, rd, 0, 8, imm8);
|
|
|
++ } else {
|
|
|
++ tcg_out_dupi_gadget(s, mvni, q, rd, 0, a, imm8);
|
|
|
++ }
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ // If we can't perform either of the optimizations, we'll need to do this in two steps.
|
|
|
++ // Normally, we'd emit a gadget for both steps, but in this case that'd result in needing -way-
|
|
|
++ // too many gadgets. We'll emit two, instead.
|
|
|
++ tcg_out_dupi_gadget(s, movi, q, rd, 0, 8, v16 & 0xff);
|
|
|
++ tcg_out_dupi_gadget(s, orr, q, rd, 0, a, v16 >> 8);
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ // FIXME: implement 32B move optimizations
|
|
|
++
|
|
|
++
|
|
|
++ // Try to create optimized 32B moves.
|
|
|
++ //else if (vece == MO_32) {
|
|
|
++ // uint32_t v32 = v64;
|
|
|
++ // uint32_t n32 = ~v32;
|
|
|
++
|
|
|
++ // if (is_shimm32(v32, &cmode, &imm8) ||
|
|
|
++ // is_soimm32(v32, &cmode, &imm8) ||
|
|
|
++ // is_fimm32(v32, &cmode, &imm8)) {
|
|
|
++ // tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
|
|
|
++ // return;
|
|
|
++ // }
|
|
|
++ // if (is_shimm32(n32, &cmode, &imm8) ||
|
|
|
++ // is_soimm32(n32, &cmode, &imm8)) {
|
|
|
++ // tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
|
|
|
++ // return;
|
|
|
++ // }
|
|
|
++
|
|
|
++ // //
|
|
|
++ // // Restrict the set of constants to those we can load with
|
|
|
++ // // two instructions. Others we load from the pool.
|
|
|
++ // //
|
|
|
++ // i = is_shimm32_pair(v32, &cmode, &imm8);
|
|
|
++ // if (i) {
|
|
|
++ // tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
|
|
|
++ // tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
|
|
|
++ // return;
|
|
|
++ // }
|
|
|
++ // i = is_shimm32_pair(n32, &cmode, &imm8);
|
|
|
++ // if (i) {
|
|
|
++ // tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
|
|
|
++ // tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
|
|
|
++ // return;
|
|
|
++ // }
|
|
|
++ //}
|
|
|
++
|
|
|
++ return false;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/* Emits instructions that can load an immediate into a vector. */
|
|
|
++static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, int64_t v64)
|
|
|
++{
|
|
|
++ // Convert Rd into a simple gadget number.
|
|
|
++ rd = rd - (TCG_REG_V16);
|
|
|
++
|
|
|
++ // First, try to create an optimized implementation, if possible.
|
|
|
++ if (tcg_out_optimized_dupi_vec(s, type, vece, rd, v64)) {
|
|
|
++ return;
|
|
|
++ }
|
|
|
++
|
|
|
++ // If we didn't, we'll need to load the full vector from memory.
|
|
|
++ // Emit it into our bytecode stream as an immediate; which we'll then
|
|
|
++ // load inside the gadget.
|
|
|
++ if (type == TCG_TYPE_V128) {
|
|
|
++ tcg_out_unary_gadget(s, gadget_ldi_q, rd);
|
|
|
++ tcg_out64(s, v64);
|
|
|
++ tcg_out64(s, v64);
|
|
|
++ } else {
|
|
|
++ tcg_out_unary_gadget(s, gadget_ldi_d, rd);
|
|
|
++ tcg_out64(s, v64);
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/* Emits instructions that can load a register into a vector. */
|
|
|
++static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg rd, TCGReg rs)
|
|
|
++{
|
|
|
++ // Compute the gadget index for the relevant vector register.
|
|
|
++ TCGReg wd = rd - (TCG_REG_V16);
|
|
|
++
|
|
|
++ // Emit a DUP gadget to handles the operation.
|
|
|
++ tcg_out_binary_vector_gadget(s, dup, vece, wd, rs);
|
|
|
++ return true;
|
|
|
++}
|
|
|
++
|
|
|
++static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg base, intptr_t offset)
|
|
|
++{
|
|
|
++ int64_t extended_offset = (int32_t)offset;
|
|
|
++
|
|
|
++ // Convert the register into a simple register number for our gadgets.
|
|
|
++ r = r - TCG_REG_V16;
|
|
|
++
|
|
|
++ // Emit a DUPM gadget...
|
|
|
++ tcg_out_binary_vector_gadget(s, dupm, vece, r, base);
|
|
|
++
|
|
|
++ // ... and emit its int64 immediate offset.
|
|
|
++ tcg_out64(s, extended_offset);
|
|
|
++
|
|
|
++ return true;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/********************************
|
|
|
++ * TCG Runtime & Platform Def *
|
|
|
++ *******************************/
|
|
|
++
|
|
|
+ static void tcg_target_init(TCGContext *s)
|
|
|
+ {
|
|
|
+ /* The current code uses uint8_t for tcg operations. */
|
|
|
+ tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX);
|
|
|
+
|
|
|
+- /* Registers available for 32 bit operations. */
|
|
|
+- tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
|
|
|
+- /* Registers available for 64 bit operations. */
|
|
|
+- tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
|
|
|
+-
|
|
|
+- /* TODO: Which registers should be set here? */
|
|
|
+- tcg_target_call_clobber_regs = BIT(TCG_TARGET_NB_REGS) - 1;
|
|
|
++ // Registers available for each type of operation.
|
|
|
++ tcg_target_available_regs[TCG_TYPE_I32] = TCG_MASK_GP_REGISTERS;
|
|
|
++ tcg_target_available_regs[TCG_TYPE_I64] = TCG_MASK_GP_REGISTERS;
|
|
|
++ tcg_target_available_regs[TCG_TYPE_V64] = TCG_MASK_VECTOR_REGISTERS;
|
|
|
++ tcg_target_available_regs[TCG_TYPE_V128] = TCG_MASK_VECTOR_REGISTERS;
|
|
|
++
|
|
|
++ TCGReg unclobbered_registers[] = {
|
|
|
++ // We don't use registers R16+ in our runtime, so we'll not bother protecting them.
|
|
|
++ TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
|
|
|
++ TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
|
|
|
++ TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
|
|
|
++ TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
|
|
|
++
|
|
|
++ // Per our calling convention.
|
|
|
++ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
|
|
|
++ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
|
|
|
++ };
|
|
|
++
|
|
|
++ // Specify which registers are clobbered during call.
|
|
|
++ tcg_target_call_clobber_regs = -1ull;
|
|
|
++ for (unsigned i = 0; i < ARRAY_SIZE(unclobbered_registers); ++i) {
|
|
|
++ tcg_regset_reset_reg(tcg_target_call_clobber_regs, unclobbered_registers[i]);
|
|
|
++ }
|
|
|
+
|
|
|
++ // Specify which local registers we're reserving.
|
|
|
++ //
|
|
|
++ // Note that we only have to specify registers that are used in the runtime,
|
|
|
++ // and so not e.g. the register that contains AREG0, which can never be allocated.
|
|
|
+ s->reserved_regs = 0;
|
|
|
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
|
|
|
+
|
|
|
+@@ -1292,8 +2073,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_
|
|
|
+
|
|
|
+ : [return_value] "=m" (return_value)
|
|
|
+
|
|
|
+- : [areg0] "m" (env),
|
|
|
+- [sp_value] "m" (sp_value),
|
|
|
++ : [areg0] "m" (env),
|
|
|
++ [sp_value] "m" (sp_value),
|
|
|
+ [start_tb_ptr] "m" (v_tb_ptr),
|
|
|
+ [pc_mirror] "m" (pc_mirror)
|
|
|
+
|
|
|
+@@ -1318,8 +2099,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_
|
|
|
+ /* Disassemble TCI bytecode. */
|
|
|
+ int print_insn_tcti(bfd_vma addr, disassemble_info *info)
|
|
|
+ {
|
|
|
++
|
|
|
++#ifdef TCTI_GADGET_RICH_DISASSEMBLY
|
|
|
+ Dl_info symbol_info = {};
|
|
|
+ char symbol_name[48] ;
|
|
|
++#endif
|
|
|
+
|
|
|
+ int status;
|
|
|
+ uint64_t block;
|
|
|
+@@ -1331,16 +2115,22 @@ int print_insn_tcti(bfd_vma addr, disassemble_info *info)
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
++#ifdef TCTI_GADGET_RICH_DISASSEMBLY
|
|
|
+ // Most of our disassembly stream will be gadgets. Try to get their names, for nice output.
|
|
|
+ dladdr((void *)block, &symbol_info);
|
|
|
+
|
|
|
+ if(symbol_info.dli_sname != 0) {
|
|
|
+- strlcpy(symbol_name, symbol_info.dli_sname, 47);
|
|
|
++ strncpy(symbol_name, symbol_info.dli_sname, sizeof(symbol_name));
|
|
|
++ symbol_name[sizeof(symbol_name) - 1] = 0;
|
|
|
+ info->fprintf_func(info->stream, "%s", symbol_name);
|
|
|
+ } else {
|
|
|
+- info->fprintf_func(info->stream, "%016llx", block);
|
|
|
++ info->fprintf_func(info->stream, "%016lx", block);
|
|
|
+ }
|
|
|
+
|
|
|
++#else
|
|
|
++ info->fprintf_func(info->stream, "%016lx", block);
|
|
|
++#endif
|
|
|
++
|
|
|
+ return sizeof(block);
|
|
|
+ }
|
|
|
+
|
|
|
+diff --git a/tcg/aarch64-tcti/tcg-target.h b/tcg/aarch64-tcti/tcg-target.h
|
|
|
+index 7eb3bb1c3d..bf4e7e2772 100644
|
|
|
+--- a/tcg/aarch64-tcti/tcg-target.h
|
|
|
++++ b/tcg/aarch64-tcti/tcg-target.h
|
|
|
+@@ -56,8 +56,11 @@
|
|
|
+ // weird psuedo-native bytecode. We'll indicate that we're intepreted.
|
|
|
+ #define TCG_TARGET_INTERPRETER 1
|
|
|
+
|
|
|
++// Specify we'll handle direct jumps.
|
|
|
++#define TCG_TARGET_HAS_direct_jump 1
|
|
|
++
|
|
|
+ //
|
|
|
+-// Supported optional instructions.
|
|
|
++// Supported optional scalar instructions.
|
|
|
+ //
|
|
|
+
|
|
|
+ // Divs.
|
|
|
+@@ -78,23 +81,35 @@
|
|
|
+ #define TCG_TARGET_HAS_ext16u_i64 1
|
|
|
+ #define TCG_TARGET_HAS_ext32u_i64 1
|
|
|
+
|
|
|
+-// Logicals.
|
|
|
++// Register extractions.
|
|
|
++#define TCG_TARGET_HAS_extrl_i64_i32 1
|
|
|
++#define TCG_TARGET_HAS_extrh_i64_i32 1
|
|
|
++
|
|
|
++// Negations.
|
|
|
+ #define TCG_TARGET_HAS_neg_i32 1
|
|
|
+ #define TCG_TARGET_HAS_not_i32 1
|
|
|
+ #define TCG_TARGET_HAS_neg_i64 1
|
|
|
+ #define TCG_TARGET_HAS_not_i64 1
|
|
|
+
|
|
|
++// Logicals.
|
|
|
+ #define TCG_TARGET_HAS_andc_i32 1
|
|
|
+ #define TCG_TARGET_HAS_orc_i32 1
|
|
|
+ #define TCG_TARGET_HAS_eqv_i32 1
|
|
|
++#define TCG_TARGET_HAS_rot_i32 1
|
|
|
++#define TCG_TARGET_HAS_nand_i32 1
|
|
|
++#define TCG_TARGET_HAS_nor_i32 1
|
|
|
+ #define TCG_TARGET_HAS_andc_i64 1
|
|
|
+ #define TCG_TARGET_HAS_eqv_i64 1
|
|
|
+ #define TCG_TARGET_HAS_orc_i64 1
|
|
|
++#define TCG_TARGET_HAS_rot_i64 1
|
|
|
++#define TCG_TARGET_HAS_nor_i64 1
|
|
|
++#define TCG_TARGET_HAS_nand_i64 1
|
|
|
+
|
|
|
+-// We don't curretly support rotates, since AArch64 lacks ROL.
|
|
|
+-// We'll fix this later.
|
|
|
+-#define TCG_TARGET_HAS_rot_i32 0
|
|
|
+-#define TCG_TARGET_HAS_rot_i64 0
|
|
|
++// Bitwise operations.
|
|
|
++#define TCG_TARGET_HAS_clz_i32 1
|
|
|
++#define TCG_TARGET_HAS_ctz_i32 1
|
|
|
++#define TCG_TARGET_HAS_clz_i64 1
|
|
|
++#define TCG_TARGET_HAS_ctz_i64 1
|
|
|
+
|
|
|
+ // Swaps.
|
|
|
+ #define TCG_TARGET_HAS_bswap16_i32 1
|
|
|
+@@ -104,53 +119,58 @@
|
|
|
+ #define TCG_TARGET_HAS_bswap64_i64 1
|
|
|
+ #define TCG_TARGET_HAS_MEMORY_BSWAP 1
|
|
|
+
|
|
|
+-// Specify we'll handle direct jumps.
|
|
|
+-#define TCG_TARGET_HAS_direct_jump 1
|
|
|
+-
|
|
|
+ //
|
|
|
+-// Potential TODOs.
|
|
|
++// Supported optional vector instructions.
|
|
|
+ //
|
|
|
+
|
|
|
+-// TODO: implement DEPOSIT as BFI.
|
|
|
+-#define TCG_TARGET_HAS_deposit_i32 0
|
|
|
+-#define TCG_TARGET_HAS_deposit_i64 0
|
|
|
+-
|
|
|
+-// TODO: implement EXTRACT as BFX.
|
|
|
+-#define TCG_TARGET_HAS_extract_i32 0
|
|
|
+-#define TCG_TARGET_HAS_sextract_i32 0
|
|
|
+-#define TCG_TARGET_HAS_extract_i64 0
|
|
|
+-#define TCG_TARGET_HAS_sextract_i64 0
|
|
|
+-
|
|
|
+-// TODO: it might be worth writing a gadget for this
|
|
|
+-#define TCG_TARGET_HAS_movcond_i32 0
|
|
|
+-#define TCG_TARGET_HAS_movcond_i64 0
|
|
|
++#define TCG_TARGET_HAS_v64 1
|
|
|
++#define TCG_TARGET_HAS_v128 1
|
|
|
++#define TCG_TARGET_HAS_v256 0
|
|
|
++
|
|
|
++#define TCG_TARGET_HAS_andc_vec 1
|
|
|
++#define TCG_TARGET_HAS_orc_vec 1
|
|
|
++#define TCG_TARGET_HAS_nand_vec 0
|
|
|
++#define TCG_TARGET_HAS_nor_vec 0
|
|
|
++#define TCG_TARGET_HAS_eqv_vec 0
|
|
|
++#define TCG_TARGET_HAS_not_vec 1
|
|
|
++#define TCG_TARGET_HAS_neg_vec 1
|
|
|
++#define TCG_TARGET_HAS_abs_vec 1
|
|
|
++#define TCG_TARGET_HAS_roti_vec 0
|
|
|
++#define TCG_TARGET_HAS_rots_vec 0
|
|
|
++#define TCG_TARGET_HAS_rotv_vec 0
|
|
|
++#define TCG_TARGET_HAS_shi_vec 0
|
|
|
++#define TCG_TARGET_HAS_shs_vec 0
|
|
|
++#define TCG_TARGET_HAS_shv_vec 1
|
|
|
++#define TCG_TARGET_HAS_mul_vec 1
|
|
|
++#define TCG_TARGET_HAS_sat_vec 1
|
|
|
++#define TCG_TARGET_HAS_minmax_vec 1
|
|
|
++#define TCG_TARGET_HAS_bitsel_vec 1
|
|
|
++#define TCG_TARGET_HAS_cmpsel_vec 0
|
|
|
+
|
|
|
+ //
|
|
|
+ // Unsupported instructions.
|
|
|
+ //
|
|
|
+
|
|
|
+-// ARMv8 doesn't have instructions for NAND/NOR.
|
|
|
+-#define TCG_TARGET_HAS_nand_i32 0
|
|
|
+-#define TCG_TARGET_HAS_nor_i32 0
|
|
|
+-#define TCG_TARGET_HAS_nor_i64 0
|
|
|
+-#define TCG_TARGET_HAS_nand_i64 0
|
|
|
+-
|
|
|
+-// aarch64's CLZ is implemented without a condition, so it
|
|
|
+-#define TCG_TARGET_HAS_clz_i32 0
|
|
|
+-#define TCG_TARGET_HAS_ctz_i32 0
|
|
|
++// There's no direct instruction with which to count the number of ones,
|
|
|
++// so we'll leave this implemented as other instructions.
|
|
|
+ #define TCG_TARGET_HAS_ctpop_i32 0
|
|
|
+-#define TCG_TARGET_HAS_clz_i64 0
|
|
|
+-#define TCG_TARGET_HAS_ctz_i64 0
|
|
|
+ #define TCG_TARGET_HAS_ctpop_i64 0
|
|
|
+
|
|
|
+-// We don't have a simple gadget for this, since we're always assuming softmmu.
|
|
|
+-#define TCG_TARGET_HAS_qemu_st8_i32 0
|
|
|
+-
|
|
|
+-// No AArch64 equivalent.a
|
|
|
+-#define TCG_TARGET_HAS_extrl_i64_i32 0
|
|
|
+-#define TCG_TARGET_HAS_extrh_i64_i32 0
|
|
|
++// We don't currently support gadgets with more than three arguments,
|
|
|
++// so we can't yet create movcond, deposit, or extract gadgets.
|
|
|
++#define TCG_TARGET_HAS_movcond_i32 0
|
|
|
++#define TCG_TARGET_HAS_movcond_i64 0
|
|
|
++#define TCG_TARGET_HAS_deposit_i32 0
|
|
|
++#define TCG_TARGET_HAS_deposit_i64 0
|
|
|
++#define TCG_TARGET_HAS_extract_i32 0
|
|
|
++#define TCG_TARGET_HAS_sextract_i32 0
|
|
|
++#define TCG_TARGET_HAS_extract_i64 0
|
|
|
++#define TCG_TARGET_HAS_sextract_i64 0
|
|
|
+
|
|
|
+-#define TCG_TARGET_HAS_extract2_i64 0
|
|
|
++// This operation exists specifically to allow us to provide differing register
|
|
|
++// constraints for 8-bit loads and stores. We don't need to do so, so we'll leave
|
|
|
++// this unimplemented, as we gain nothing by it.
|
|
|
++#define TCG_TARGET_HAS_qemu_st8_i32 0
|
|
|
+
|
|
|
+ // These should always be zero on our 64B platform.
|
|
|
+ #define TCG_TARGET_HAS_muls2_i64 0
|
|
|
+@@ -166,36 +186,55 @@
|
|
|
+ #define TCG_TARGET_HAS_muls2_i32 0
|
|
|
+ #define TCG_TARGET_HAS_muluh_i32 0
|
|
|
+ #define TCG_TARGET_HAS_mulsh_i32 0
|
|
|
++#define TCG_TARGET_HAS_extract2_i64 0
|
|
|
+
|
|
|
+ //
|
|
|
+ // Platform metadata.
|
|
|
+ //
|
|
|
+
|
|
|
+ // Number of registers available.
|
|
|
+-// It might make sense to up these, since we can also use x16 -> x25?
|
|
|
+-#define TCG_TARGET_NB_REGS 16
|
|
|
++#define TCG_TARGET_NB_REGS 64
|
|
|
++
|
|
|
++// Number of general purpose registers.
|
|
|
++#define TCG_TARGET_GP_REGS 16
|
|
|
+
|
|
|
+ /* List of registers which are used by TCG. */
|
|
|
+ typedef enum {
|
|
|
+- TCG_REG_R0 = 0,
|
|
|
+- TCG_REG_R1,
|
|
|
+- TCG_REG_R2,
|
|
|
+- TCG_REG_R3,
|
|
|
+- TCG_REG_R4,
|
|
|
+- TCG_REG_R5,
|
|
|
+- TCG_REG_R6,
|
|
|
+- TCG_REG_R7,
|
|
|
+- TCG_REG_R8,
|
|
|
+- TCG_REG_R9,
|
|
|
+- TCG_REG_R10,
|
|
|
+- TCG_REG_R11,
|
|
|
+- TCG_REG_R12,
|
|
|
+- TCG_REG_R13,
|
|
|
+- TCG_REG_R14,
|
|
|
+- TCG_REG_R15,
|
|
|
+-
|
|
|
+- TCG_AREG0 = TCG_REG_R14,
|
|
|
+- TCG_REG_CALL_STACK = TCG_REG_R15,
|
|
|
++
|
|
|
++ // General purpose registers.
|
|
|
++ // Note that we name every _host_ register here; but don't
|
|
|
++ // necessarily use them; that's determined by the allocation order
|
|
|
++ // and the number of registers setting above. These just give us the ability
|
|
|
++ // to refer to these by name.
|
|
|
++ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
|
|
|
++ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
|
|
|
++ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
|
|
|
++ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
|
|
|
++ TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
|
|
|
++ TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
|
|
|
++ TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
|
|
|
++ TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
|
|
|
++
|
|
|
++ // Register aliases.
|
|
|
++ TCG_AREG0 = TCG_REG_R14,
|
|
|
++ TCG_REG_CALL_STACK = TCG_REG_R15,
|
|
|
++
|
|
|
++ // Mask that refers to the GP registers.
|
|
|
++ TCG_MASK_GP_REGISTERS = 0xFFFFul,
|
|
|
++
|
|
|
++ // Vector registers.
|
|
|
++ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
|
|
|
++ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
|
|
|
++ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
|
|
|
++ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
|
|
|
++ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
|
|
|
++ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
|
|
|
++ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
|
|
|
++ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
|
|
|
++
|
|
|
++ // Mask that refers to the vector registers.
|
|
|
++ TCG_MASK_VECTOR_REGISTERS = 0xFFFF000000000000ul,
|
|
|
++
|
|
|
+ } TCGReg;
|
|
|
+
|
|
|
+ // Specify the shape of the stack our runtime will use.
|
|
|
+diff --git a/tcg/aarch64-tcti/tcg-target.opc.h b/tcg/aarch64-tcti/tcg-target.opc.h
|
|
|
+new file mode 100644
|
|
|
+index 0000000000..26bfd9c460
|
|
|
+--- /dev/null
|
|
|
++++ b/tcg/aarch64-tcti/tcg-target.opc.h
|
|
|
+@@ -0,0 +1,14 @@
|
|
|
++/*
|
|
|
++ * Copyright (c) 2019 Linaro
|
|
|
++ *
|
|
|
++ * This work is licensed under the terms of the GNU GPL, version 2 or
|
|
|
++ * (at your option) any later version.
|
|
|
++ *
|
|
|
++ * See the COPYING file in the top-level directory for details.
|
|
|
++ *
|
|
|
++ * Target-specific opcodes for host vector expansion. These will be
|
|
|
++ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
|
|
|
++ * consider these to be UNSPEC with names.
|
|
|
++ */
|
|
|
++
|
|
|
++DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
|
|
|
+diff --git a/tcg/aarch64-tcti/tcti-gadget-gen.py b/tcg/aarch64-tcti/tcti-gadget-gen.py
|
|
|
+index fa0232fefa..4e127ff8c3 100755
|
|
|
+--- a/tcg/aarch64-tcti/tcti-gadget-gen.py
|
|
|
++++ b/tcg/aarch64-tcti/tcti-gadget-gen.py
|
|
|
+@@ -4,17 +4,10 @@
|
|
|
+ Generates a C-code include file containing 'gadgets' for use by TCTI.
|
|
|
+ """
|
|
|
+
|
|
|
++import os
|
|
|
+ import sys
|
|
|
+ import itertools
|
|
|
+
|
|
|
+-# Get a handle on the file we'll be working with, and redirect print to it.
|
|
|
+-if len(sys.argv) > 1:
|
|
|
+- out_file = open(sys.argv[1], "w")
|
|
|
+-
|
|
|
+- # Hook our print function, so it always outputs to the relevant file.
|
|
|
+- core_print = print
|
|
|
+- print = lambda *a, **k : core_print(*a, **k, file=out_file)
|
|
|
+-
|
|
|
+ # Epilogue code follows at the end of each gadget, and handles continuing execution.
|
|
|
+ EPILOGUE = (
|
|
|
+ # Load our next gadget address from our bytecode stream, advancing it.
|
|
|
+@@ -32,41 +25,113 @@
|
|
|
+ # Helper that provides each of the AArch64 condition codes of interest.
|
|
|
+ ARCH_CONDITION_CODES = ["eq", "ne", "lt", "ge", "le", "gt", "lo", "hs", "ls", "hi"]
|
|
|
+
|
|
|
++# The list of vector size codes supported on this platform.
|
|
|
++VECTOR_SIZES = ['16b', '8b', '4h', '8h', '2s', '4s', '2d']
|
|
|
++
|
|
|
+ # We'll create a variety of gadgets that assume the MMU's TLB is stored at certain
|
|
|
+ # offsets into its structure. These should match the offsets in tcg-target.c.in.
|
|
|
+-QEMU_ALLOWED_MMU_OFFSETS = [ 64, 96, 128 ]
|
|
|
++QEMU_ALLOWED_MMU_OFFSETS = [ 32, 48, 64, 96, 128 ]
|
|
|
+
|
|
|
+ # Statistics.
|
|
|
+ gadgets = 0
|
|
|
+ instructions = 0
|
|
|
+
|
|
|
+-def simple(name, *lines):
|
|
|
++# Files to write to.
|
|
|
++current_collection = "basic"
|
|
|
++output_files = {}
|
|
|
++
|
|
|
++# Create a top-level header.
|
|
|
++top_header = open("tcti_gadgets.h", "w")
|
|
|
++print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=top_header)
|
|
|
++
|
|
|
++def _get_output_files():
|
|
|
++ """ Gathers the output C and H files for a given gadget-cluster name. """
|
|
|
++
|
|
|
++ # If we don't have an output file for this already, create it.
|
|
|
++ return output_files[current_collection]
|
|
|
++
|
|
|
++
|
|
|
++def START_COLLECTION(name):
|
|
|
++ """ Sets the name of the current collection. """
|
|
|
++
|
|
|
++ global current_collection
|
|
|
++
|
|
|
++ # If we already have a collection for this, skip it.
|
|
|
++ if name in output_files:
|
|
|
++ return
|
|
|
++
|
|
|
++ # Create the relevant output files
|
|
|
++ new_c_file = open(f"tcti_{name}_gadgets.c", "w")
|
|
|
++ new_h_file = open(f"tcti_{name}_gadgets.h", "w")
|
|
|
++ output_files[name] = (new_c_file, new_h_file)
|
|
|
++
|
|
|
++ # Add the file to our gadget collection.
|
|
|
++ print(f'#include "tcti_{name}_gadgets.h"', file=top_header)
|
|
|
++
|
|
|
++ # Add generated messages to the relevant collection.
|
|
|
++ print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=new_c_file)
|
|
|
++ print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n", file=new_h_file)
|
|
|
++
|
|
|
++ # Start our C file with inclusion of the relevant header.
|
|
|
++ print(f'\n#include "tcti_{name}_gadgets.h"\n', file=new_c_file)
|
|
|
++
|
|
|
++ # Start our H file with a simple pragma-guard, for speed.
|
|
|
++ print('\n#pragma once\n', file=new_h_file)
|
|
|
++
|
|
|
++ # Finally, set the global active collection.
|
|
|
++ current_collection = name
|
|
|
++
|
|
|
++
|
|
|
++def simple(name, *lines, export=True):
|
|
|
+ """ Generates a simple gadget that needs no per-register specialization. """
|
|
|
+
|
|
|
+ global gadgets, instructions
|
|
|
+
|
|
|
+ gadgets += 1
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
+ # Create our C/ASM framing.
|
|
|
+- #print(f"__attribute__((naked)) static void gadget_{name}(void)")
|
|
|
+- print(f"__attribute__((naked)) static void gadget_{name}(void);")
|
|
|
+- print(f"__attribute__((naked)) static void gadget_{name}(void)")
|
|
|
+- print("{")
|
|
|
++ if export:
|
|
|
++ print(f"__attribute__((naked)) void gadget_{name}(void);", file=h_file)
|
|
|
++ print(f"__attribute__((naked)) void gadget_{name}(void)", file=c_file)
|
|
|
++ else:
|
|
|
++ print(f"static __attribute__((naked)) void gadget_{name}(void)", file=c_file)
|
|
|
++
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ # Add the core gadget
|
|
|
+- print("\tasm(")
|
|
|
++ print("\tasm(", file=c_file)
|
|
|
+ for line in lines + EPILOGUE:
|
|
|
+- print(f"\t\t\"{line} \\n\"")
|
|
|
++ print(f"\t\t\"{line} \\n\"", file=c_file)
|
|
|
+ instructions += 1
|
|
|
+- print("\t);")
|
|
|
++ print("\t);", file=c_file)
|
|
|
+
|
|
|
+ # End our framing.
|
|
|
+- print("}\n")
|
|
|
++ print("}\n", file=c_file)
|
|
|
++
|
|
|
+
|
|
|
+
|
|
|
+ def with_register_substitutions(name, substitutions, *lines, immediate_range=range(0)):
|
|
|
+ """ Generates a collection of gadgtes with register substitutions. """
|
|
|
+
|
|
|
++ def _expand_op1_immediate(num):
|
|
|
++ """ Gets a uncompressed bitfield argument for a given immediate; for NEON instructions.
|
|
|
++
|
|
|
++ Duplciates each bit eight times; converting 0b0100 to 0x00FF0000.
|
|
|
++ """
|
|
|
++
|
|
|
++ # Get the number as a binary string...
|
|
|
++ binstring = bin(num)[2:]
|
|
|
++
|
|
|
++ # ... expand out the values to hex...
|
|
|
++ hex_string = binstring.replace('1', 'FF').replace('0', '00')
|
|
|
++
|
|
|
++ # ... and return out the new constant.
|
|
|
++ return f"0x{hex_string}"
|
|
|
++
|
|
|
++
|
|
|
+ def substitutions_for_letter(letter, number, line):
|
|
|
+ """ Helper that transforms Wd => w1, implementing gadget substitutions. """
|
|
|
+
|
|
|
+@@ -74,8 +139,16 @@ def substitutions_for_letter(letter, number, line):
|
|
|
+ line = line.replace(f"X{letter}", f"x{number}")
|
|
|
+ line = line.replace(f"W{letter}", f"w{number}")
|
|
|
+
|
|
|
+- # ... immediate substitutions.
|
|
|
++ # ... vector register substitutions...
|
|
|
++ line = line.replace(f"V{letter}", f"v{number + 16}")
|
|
|
++ line = line.replace(f"D{letter}", f"d{number + 16}")
|
|
|
++ line = line.replace(f"Q{letter}", f"q{number + 16}")
|
|
|
++
|
|
|
++ # ... regular immediate substitutions...
|
|
|
+ line = line.replace(f"I{letter}", f"{number}")
|
|
|
++
|
|
|
++ # ... and compressed immediate substitutions.
|
|
|
++ line = line.replace(f"S{letter}", f"{_expand_op1_immediate(number)}")
|
|
|
+ return line
|
|
|
+
|
|
|
+
|
|
|
+@@ -105,77 +178,94 @@ def substitutions_for_letter(letter, number, line):
|
|
|
+
|
|
|
+ # ... and emit the gadget.
|
|
|
+ permutation_id = "_arg".join(str(number) for number in permutation)
|
|
|
+- simple(f"{name}_arg{permutation_id}", *new_lines)
|
|
|
++ simple(f"{name}_arg{permutation_id}", *new_lines, export=False)
|
|
|
+
|
|
|
+
|
|
|
+ def with_dnm(name, *lines):
|
|
|
+ """ Generates a collection of gadgets with substitutions for Xd, Xn, and Xm, and equivalents. """
|
|
|
+ with_register_substitutions(name, ("d", "n", "m"), *lines)
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
++ # Print out an extern.
|
|
|
++ print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}];", file=h_file)
|
|
|
++
|
|
|
+ # Print out an array that contains all of our gadgets, for lookup.
|
|
|
+- print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="")
|
|
|
+- print("{")
|
|
|
++ print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ # D array
|
|
|
+ for d in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t{")
|
|
|
++ print("\t{", file=c_file)
|
|
|
+
|
|
|
+ # N array
|
|
|
+ for n in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t\t{", end="")
|
|
|
++ print("\t\t{", end="", file=c_file)
|
|
|
+
|
|
|
+ # M array
|
|
|
+ for m in TCG_REGISTER_NUMBERS:
|
|
|
+- print(f"gadget_{name}_arg{d}_arg{n}_arg{m}", end=", ")
|
|
|
++ print(f"gadget_{name}_arg{d}_arg{n}_arg{m}", end=", ", file=c_file)
|
|
|
+
|
|
|
+- print("},")
|
|
|
+- print("\t},")
|
|
|
+- print("};")
|
|
|
++ print("},", file=c_file)
|
|
|
++ print("\t},", file=c_file)
|
|
|
++ print("};", file=c_file)
|
|
|
+
|
|
|
+
|
|
|
+ def with_dn_immediate(name, *lines, immediate_range):
|
|
|
+ """ Generates a collection of gadgets with substitutions for Xd, Xn, and Xm, and equivalents. """
|
|
|
+ with_register_substitutions(name, ["d", "n"], *lines, immediate_range=immediate_range)
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
++ # Print out an extern.
|
|
|
++ print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}];", file=h_file)
|
|
|
++
|
|
|
+ # Print out an array that contains all of our gadgets, for lookup.
|
|
|
+- print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="")
|
|
|
+- print("{")
|
|
|
++ print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="", file=c_file)
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ # D array
|
|
|
+ for d in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t{")
|
|
|
++ print("\t{", file=c_file)
|
|
|
+
|
|
|
+ # N array
|
|
|
+ for n in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t\t{", end="")
|
|
|
++ print("\t\t{", end="", file=c_file)
|
|
|
+
|
|
|
+ # M array
|
|
|
+ for i in immediate_range:
|
|
|
+- print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ")
|
|
|
++ print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ", file=c_file)
|
|
|
+
|
|
|
+- print("},")
|
|
|
+- print("\t},")
|
|
|
+- print("};")
|
|
|
++ print("},", file=c_file)
|
|
|
++ print("\t},", file=c_file)
|
|
|
++ print("};", file=c_file)
|
|
|
+
|
|
|
+
|
|
|
+ def with_pair(name, substitutions, *lines):
|
|
|
+ """ Generates a collection of gadgets with two subtstitutions."""
|
|
|
+ with_register_substitutions(name, substitutions, *lines)
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
++ print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}];", file=h_file)
|
|
|
++
|
|
|
+ # Print out an array that contains all of our gadgets, for lookup.
|
|
|
+- print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="")
|
|
|
+- print("{")
|
|
|
++ print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}][{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ # N array
|
|
|
+ for a in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t\t{", end="")
|
|
|
++ print("\t\t{", end="", file=c_file)
|
|
|
+
|
|
|
+ # M array
|
|
|
+ for b in TCG_REGISTER_NUMBERS:
|
|
|
+- print(f"gadget_{name}_arg{a}_arg{b}", end=", ")
|
|
|
++ print(f"gadget_{name}_arg{a}_arg{b}", end=", ", file=c_file)
|
|
|
+
|
|
|
+- print("},")
|
|
|
+- print("};")
|
|
|
++ print("},", file=c_file)
|
|
|
++ print("};", file=c_file)
|
|
|
+
|
|
|
+
|
|
|
+ def math_dnm(name, mnemonic):
|
|
|
+@@ -183,10 +273,10 @@ def math_dnm(name, mnemonic):
|
|
|
+ with_dnm(f'{name}_i32', f"{mnemonic} Wd, Wn, Wm")
|
|
|
+ with_dnm(f'{name}_i64', f"{mnemonic} Xd, Xn, Xm")
|
|
|
+
|
|
|
+-def math_dn(name, mnemonic):
|
|
|
++def math_dn(name, mnemonic, source_is_wn=False):
|
|
|
+ """ Equivalent to `with_dn`, but creates a _i32 and _i64 variant. For simple math. """
|
|
|
+ with_dn(f'{name}_i32', f"{mnemonic} Wd, Wn")
|
|
|
+- with_dn(f'{name}_i64', f"{mnemonic} Xd, Xn")
|
|
|
++ with_dn(f'{name}_i64', f"{mnemonic} Xd, Wn" if source_is_wn else f"{mnemonic} Xd, Xn")
|
|
|
+
|
|
|
+
|
|
|
+ def with_nm(name, *lines):
|
|
|
+@@ -227,34 +317,44 @@ def with_single(name, substitution, *lines):
|
|
|
+ """ Generates a collection of gadgets with two subtstitutions."""
|
|
|
+ with_register_substitutions(name, (substitution,), *lines)
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
++ print(f"extern const void* gadget_{name}[{TCG_REGISTER_COUNT}];", file=h_file)
|
|
|
++
|
|
|
+ # Print out an array that contains all of our gadgets, for lookup.
|
|
|
+- print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}] = ", end="")
|
|
|
+- print("{")
|
|
|
++ print(f"const void* gadget_{name}[{TCG_REGISTER_COUNT}] = ", end="", file=c_file)
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ for n in TCG_REGISTER_NUMBERS:
|
|
|
+- print(f"gadget_{name}_arg{n}", end=", ")
|
|
|
++ print(f"gadget_{name}_arg{n}", end=", ", file=c_file)
|
|
|
+
|
|
|
+- print("};")
|
|
|
++ print("};", file=c_file)
|
|
|
+
|
|
|
+
|
|
|
+ def with_d_immediate(name, *lines, immediate_range=range(0)):
|
|
|
+ """ Generates a collection of gadgets with two subtstitutions."""
|
|
|
+ with_register_substitutions(name, ['d'], *lines, immediate_range=immediate_range)
|
|
|
+
|
|
|
++ # Fetch the files we'll be using for output.
|
|
|
++ c_file, h_file = _get_output_files()
|
|
|
++
|
|
|
++ print(f"extern void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}];", file=h_file)
|
|
|
++
|
|
|
+ # Print out an array that contains all of our gadgets, for lookup.
|
|
|
+- print(f"static void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="")
|
|
|
+- print("{")
|
|
|
++ print(f"void* gadget_{name}[{TCG_REGISTER_COUNT}][{len(immediate_range)}] = ", end="", file=c_file)
|
|
|
++ print("{", file=c_file)
|
|
|
+
|
|
|
+ # D array
|
|
|
+ for a in TCG_REGISTER_NUMBERS:
|
|
|
+- print("\t\t{", end="")
|
|
|
++ print("\t\t{", end="", file=c_file)
|
|
|
+
|
|
|
+ # I array
|
|
|
+ for b in immediate_range:
|
|
|
+- print(f"gadget_{name}_arg{a}_arg{b}", end=", ")
|
|
|
++ print(f"gadget_{name}_arg{a}_arg{b}", end=", ", file=c_file)
|
|
|
+
|
|
|
+- print("},")
|
|
|
+- print("};")
|
|
|
++ print("},", file=c_file)
|
|
|
++ print("};", file=c_file)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+@@ -265,31 +365,14 @@ def with_d(name, *lines):
|
|
|
+
|
|
|
+ # Assembly code for saving our machine state before entering the C runtime.
|
|
|
+ C_CALL_PROLOGUE = [
|
|
|
+- # Store our machine state.
|
|
|
+- "str x25, [sp, #-16]!",
|
|
|
+ "stp x14, x15, [sp, #-16]!",
|
|
|
+- "stp x12, x13, [sp, #-16]!",
|
|
|
+- "stp x10, x11, [sp, #-16]!",
|
|
|
+- "stp x8, x9, [sp, #-16]!",
|
|
|
+- "stp x6, x7, [sp, #-16]!",
|
|
|
+- "stp x4, x5, [sp, #-16]!",
|
|
|
+- "stp x2, x3, [sp, #-16]!",
|
|
|
+- "stp x0, x1, [sp, #-16]!",
|
|
|
+ "stp x28, lr, [sp, #-16]!",
|
|
|
+ ]
|
|
|
+
|
|
|
+ # Assembly code for restoring our machine state after leaving the C runtime.
|
|
|
+ C_CALL_EPILOGUE = [
|
|
|
+- "ldp x28, lr, [sp], #16",
|
|
|
+- "ldp x0, x1, [sp], #16",
|
|
|
+- "ldp x2, x3, [sp], #16",
|
|
|
+- "ldp x4, x5, [sp], #16",
|
|
|
+- "ldp x6, x7, [sp], #16",
|
|
|
+- "ldp x8, x9, [sp], #16",
|
|
|
+- "ldp x10, x11, [sp], #16",
|
|
|
+- "ldp x12, x13, [sp], #16",
|
|
|
++ "ldp x28, lr, [sp], #16",
|
|
|
+ "ldp x14, x15, [sp], #16",
|
|
|
+- "ldr x25, [sp], #16",
|
|
|
+ ]
|
|
|
+
|
|
|
+
|
|
|
+@@ -503,11 +586,73 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
++
|
|
|
++def vector_dn(name, *lines):
|
|
|
++ """ Creates a set of gadgets for every size of a given vector op. Accepts 'S' as a size placeholder. """
|
|
|
++
|
|
|
++ def do_size_replacement(line, size):
|
|
|
++ line = line.replace(".S", f".{size}")
|
|
|
++
|
|
|
++ # If this size requires a 32b register, replace Wd with Xd.
|
|
|
++ if size == "2d":
|
|
|
++ line = line.replace("Wn", "Xn")
|
|
|
++
|
|
|
++ return line
|
|
|
++
|
|
|
++
|
|
|
++ # Create a variant for each size, replacing any placeholders.
|
|
|
++ for size in VECTOR_SIZES:
|
|
|
++ sized_lines = (do_size_replacement(line, size) for line in lines)
|
|
|
++ with_dn(f"{name}_{size}", *sized_lines)
|
|
|
++
|
|
|
++
|
|
|
++def vector_dnm(name, *lines, scalar=None, omit_sizes=()):
|
|
|
++ """ Creates a set of gadgets for every size of a given vector op. Accepts 'S' as a size placeholder. """
|
|
|
++
|
|
|
++ def do_size_replacement(line, size):
|
|
|
++ return line.replace(".S", f".{size}")
|
|
|
++
|
|
|
++ # Create a variant for each size, replacing any placeholders.
|
|
|
++ for size in VECTOR_SIZES:
|
|
|
++ if size in omit_sizes:
|
|
|
++ continue
|
|
|
++
|
|
|
++ sized_lines = (do_size_replacement(line, size) for line in lines)
|
|
|
++ with_dnm(f"{name}_{size}", *sized_lines)
|
|
|
++
|
|
|
++ if scalar:
|
|
|
++ if isinstance(scalar, str):
|
|
|
++ sized_lines = (scalar,)
|
|
|
++ with_dnm(f"{name}_scalar", *sized_lines)
|
|
|
++
|
|
|
++
|
|
|
++def vector_math_dnm(name, operation):
|
|
|
++ """ Generates a collection of gadgets for vector math instructions. """
|
|
|
++ vector_dnm(name, f"{operation} Vd.S, Vn.S, Vm.S", scalar=f"{operation} Dd, Dn, Dm")
|
|
|
++
|
|
|
++
|
|
|
++def vector_math_dnm_no64(name, operation):
|
|
|
++ """ Generates a collection of gadgets for vector math instructions. """
|
|
|
++ vector_dnm(name, f"{operation} Vd.S, Vn.S, Vm.S", omit_sizes=('2d',))
|
|
|
++
|
|
|
++
|
|
|
++def vector_logic_dn(name, operation):
|
|
|
++ """ Generates a pair of gadgets for vector bitwise logic instructions. """
|
|
|
++ with_dn(f"{name}_d", f"{operation} Vd.8b, Vn.8b")
|
|
|
++ with_dn(f"{name}_q", f"{operation} Vd.16b, Vn.16b")
|
|
|
++
|
|
|
++
|
|
|
++def vector_logic_dnm(name, operation):
|
|
|
++ """ Generates a pair of gadgets for vector bitwise logic instructions. """
|
|
|
++ with_dnm(f"{name}_d", f"{operation} Vd.8b, Vn.8b, Vm.8b")
|
|
|
++ with_dnm(f"{name}_q", f"{operation} Vd.16b, Vn.16b, Vm.16b")
|
|
|
++
|
|
|
++
|
|
|
+ #
|
|
|
+ # Gadget definitions.
|
|
|
+ #
|
|
|
+
|
|
|
+-print("/* Automatically generated by tcti-gadget-gen.py. Do not edit. */\n")
|
|
|
++START_COLLECTION("misc")
|
|
|
+
|
|
|
+ # Call a C language helper function by address.
|
|
|
+ simple("call",
|
|
|
+@@ -539,6 +684,7 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ "ldr x28, [x28]"
|
|
|
+ )
|
|
|
+
|
|
|
++
|
|
|
+ # Exit from a translation buffer execution.
|
|
|
+ simple("exit_tb",
|
|
|
+
|
|
|
+@@ -550,9 +696,18 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ "ret"
|
|
|
+ )
|
|
|
+
|
|
|
++# Memory barriers.
|
|
|
++simple("mb_all", "dmb ish")
|
|
|
++simple("mb_st", "dmb ishst")
|
|
|
++simple("mb_ld", "dmb ishld")
|
|
|
++
|
|
|
++
|
|
|
++
|
|
|
+
|
|
|
+ for condition in ARCH_CONDITION_CODES:
|
|
|
+
|
|
|
++ START_COLLECTION("setcond")
|
|
|
++
|
|
|
+ # Performs a comparison between two operands.
|
|
|
+ with_dnm(f"setcond_i32_{condition}",
|
|
|
+ "subs Wd, Wn, Wm",
|
|
|
+@@ -573,23 +728,20 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ # branch is funneled throught the same address.
|
|
|
+ #
|
|
|
+
|
|
|
++ START_COLLECTION("brcond")
|
|
|
++
|
|
|
+ # Branches iff a given comparison is true.
|
|
|
+ with_dnm(f'brcond_i32_{condition}',
|
|
|
+
|
|
|
+ # Grab our immediate argument.
|
|
|
+ "ldr x27, [x28], #8",
|
|
|
+
|
|
|
+- # Perform our comparison and conditional branch.
|
|
|
+- "subs Wzr, Wn, Wm",
|
|
|
+- f"b{condition} 1f",
|
|
|
+-
|
|
|
+- "0:", # not taken
|
|
|
+- # Perform our end-of-instruction epilogue.
|
|
|
+- *EPILOGUE,
|
|
|
++ # Perform our comparison...
|
|
|
++ "subs wzr, Wn, Wm",
|
|
|
+
|
|
|
+- "1:" # taken
|
|
|
+- # Update our bytecode pointer to take the label.
|
|
|
+- "mov x28, x27"
|
|
|
++ # ... and our conditional branch, which selectively sets w28 (our "gadget pointer")
|
|
|
++ # to the new location, if required.
|
|
|
++ f"csel x28, x27, x28, {condition}"
|
|
|
+ )
|
|
|
+
|
|
|
+ # Branches iff a given comparison is true.
|
|
|
+@@ -599,19 +751,17 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ "ldr x27, [x28], #8",
|
|
|
+
|
|
|
+ # Perform our comparison and conditional branch.
|
|
|
+- "subs Xzr, Xn, Xm",
|
|
|
+- f"b{condition} 1f",
|
|
|
++ "subs xzr, Xn, Xm",
|
|
|
+
|
|
|
+- "0:", # not taken
|
|
|
+- # Perform our end-of-instruction epilogue.
|
|
|
+- *EPILOGUE,
|
|
|
+-
|
|
|
+- "1:" # taken
|
|
|
+- # Update our bytecode pointer to take the label.
|
|
|
+- "mov x28, x27"
|
|
|
++ # ... and our conditional branch, which selectively sets w28 (our "gadget pointer")
|
|
|
++ # to the new location, if required.
|
|
|
++ f"csel x28, x27, x28, {condition}"
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
++START_COLLECTION("mov")
|
|
|
++
|
|
|
++
|
|
|
+ # MOV variants.
|
|
|
+ with_dn("mov_i32", "mov Wd, Wn")
|
|
|
+ with_dn("mov_i64", "mov Xd, Xn")
|
|
|
+@@ -623,17 +773,24 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ with_d_immediate("movi_imm_i32", "mov Wd, #Ii", immediate_range=range(64))
|
|
|
+ with_d_immediate("movi_imm_i64", "mov Xd, #Ii", immediate_range=range(64))
|
|
|
+
|
|
|
++START_COLLECTION("load_unsigned")
|
|
|
++
|
|
|
+ # LOAD variants.
|
|
|
+ # TODO: should the signed variants have X variants for _i64?
|
|
|
+ ldst_dn("ld8u", "ldrb Wd, [Xn, x27]")
|
|
|
++ldst_dn("ld16u", "ldrh Wd, [Xn, x27]")
|
|
|
++ldst_dn("ld32u", "ldr Wd, [Xn, x27]")
|
|
|
++ldst_dn("ld_i64", "ldr Xd, [Xn, x27]")
|
|
|
++
|
|
|
++START_COLLECTION("load_signed")
|
|
|
++
|
|
|
+ ldst_dn("ld8s_i32", "ldrsb Wd, [Xn, x27]")
|
|
|
+ ldst_dn("ld8s_i64", "ldrsb Xd, [Xn, x27]")
|
|
|
+-ldst_dn("ld16u", "ldrh Wd, [Xn, x27]")
|
|
|
+ ldst_dn("ld16s_i32", "ldrsh Wd, [Xn, x27]")
|
|
|
+ ldst_dn("ld16s_i64", "ldrsh Xd, [Xn, x27]")
|
|
|
+-ldst_dn("ld32u", "ldr Wd, [Xn, x27]")
|
|
|
+ ldst_dn("ld32s_i64", "ldrsw Xd, [Xn, x27]")
|
|
|
+-ldst_dn("ld_i64", "ldr Xd, [Xn, x27]")
|
|
|
++
|
|
|
++START_COLLECTION("store")
|
|
|
+
|
|
|
+ # STORE variants.
|
|
|
+ ldst_dn("st8", "strb Wd, [Xn, x27]")
|
|
|
+@@ -644,6 +801,8 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ # QEMU LD/ST are handled in our C runtime rather than with simple gadgets,
|
|
|
+ # as they're nontrivial.
|
|
|
+
|
|
|
++START_COLLECTION("arithmetic")
|
|
|
++
|
|
|
+ # Trivial arithmetic.
|
|
|
+ math_dnm("add" , "add" )
|
|
|
+ math_dnm("sub" , "sub" )
|
|
|
+@@ -657,6 +816,8 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ with_dnm("remu_i32", "udiv w27, Wn, Wm", "msub Wd, w27, Wm, Wn")
|
|
|
+ with_dnm("remu_i64", "udiv x27, Xn, Xm", "msub Xd, x27, Xm, Xn")
|
|
|
+
|
|
|
++START_COLLECTION("logical")
|
|
|
++
|
|
|
+ # Trivial logical.
|
|
|
+ math_dn( "not", "mvn")
|
|
|
+ math_dn( "neg", "neg")
|
|
|
+@@ -669,71 +830,155 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ math_dnm("shl", "lsl")
|
|
|
+ math_dnm("shr", "lsr")
|
|
|
+ math_dnm("sar", "asr")
|
|
|
++math_dnm("rotr", "ror")
|
|
|
+
|
|
|
+ # AArch64 lacks a Rotate Left; so we instead rotate right by a negative.
|
|
|
+-# TODO: validate this?
|
|
|
+-#math_dnm("rotr", "ror")
|
|
|
+-#with_dnm("rotl_i32", "neg w27, Wm", "ror Wd, Wn, w27")
|
|
|
+-#with_dnm("rotl_i64", "neg x27, Xm", "ror Xd, Xn, x27")
|
|
|
++with_dnm("rotl_i32", "neg w27, Wm", "ror Wd, Wn, w27")
|
|
|
++with_dnm("rotl_i64", "neg w27, Wm", "ror Xd, Xn, x27")
|
|
|
++
|
|
|
++# We'll synthesize several instructions that don't exist; since it's still faster
|
|
|
++# to run these as gadgets.
|
|
|
++with_dnm("nand_i32", "and Wd, Wn, Wm", "mvn Wd, Wd")
|
|
|
++with_dnm("nand_i64", "and Xd, Xn, Xm", "mvn Xd, Xd")
|
|
|
++with_dnm("nor_i32", "orr Wd, Wn, Wm", "mvn Wd, Wd")
|
|
|
++with_dnm("nor_i64", "orr Xd, Xn, Xm", "mvn Xd, Xd")
|
|
|
++
|
|
|
++START_COLLECTION("bitwise")
|
|
|
++
|
|
|
++# Count leading zeroes, with a twist: QEMU requires us to provide
|
|
|
++# a default value for when the argument is 0.
|
|
|
++with_dnm("clz_i32",
|
|
|
++
|
|
|
++ # Perform the core CLZ into w26.
|
|
|
++ "clz w26, Wn",
|
|
|
++
|
|
|
++ # Check Wn to see if it was zero
|
|
|
++ "tst Wn, Wn",
|
|
|
++
|
|
|
++ # If it was zero, accept the argument provided in Wm.
|
|
|
++ # Otherwise, accept our result from w26.
|
|
|
++ "csel Wd, Wm, w26, eq"
|
|
|
++)
|
|
|
++with_dnm("clz_i64",
|
|
|
++
|
|
|
++ # Perform the core CLZ into w26.
|
|
|
++ "clz x26, Xn",
|
|
|
++
|
|
|
++ # Check Wn to see if it was zero
|
|
|
++ "tst Xn, Xn",
|
|
|
++
|
|
|
++ # If it was zero, accept the argument provided in Wm.
|
|
|
++ # Otherwise, accept our result from w26.
|
|
|
++ "csel Xd, Xm, x26, eq"
|
|
|
++)
|
|
|
++
|
|
|
++
|
|
|
++# Count trailing zeroes, with a twist: QEMU requires us to provide
|
|
|
++# a default value for when the argument is 0.
|
|
|
++with_dnm("ctz_i32",
|
|
|
++ # Reverse our bits before performing our actual clz.
|
|
|
++ "rbit w26, Wn",
|
|
|
++ "clz w26, w26",
|
|
|
++
|
|
|
++ # Check Wn to see if it was zero
|
|
|
++ "tst Wn, Wn",
|
|
|
++
|
|
|
++ # If it was zero, accept the argument provided in Wm.
|
|
|
++ # Otherwise, accept our result from w26.
|
|
|
++ "csel Wd, Wm, w26, eq"
|
|
|
++)
|
|
|
++with_dnm("ctz_i64",
|
|
|
++
|
|
|
++ # Perform the core CLZ into w26.
|
|
|
++ "rbit x26, Xn",
|
|
|
++ "clz x26, x26",
|
|
|
++
|
|
|
++ # Check Wn to see if it was zero
|
|
|
++ "tst Xn, Xn",
|
|
|
++
|
|
|
++ # If it was zero, accept the argument provided in Wm.
|
|
|
++ # Otherwise, accept our result from w26.
|
|
|
++ "csel Xd, Xm, x26, eq"
|
|
|
++)
|
|
|
++
|
|
|
++
|
|
|
++START_COLLECTION("extension")
|
|
|
+
|
|
|
+ # Numeric extension.
|
|
|
+-math_dn("ext8s", "sxtb")
|
|
|
++math_dn("ext8s", "sxtb", source_is_wn=True)
|
|
|
+ with_dn("ext8u", "and Xd, Xn, #0xff")
|
|
|
+-math_dn("ext16s", "sxth")
|
|
|
++math_dn("ext16s", "sxth", source_is_wn=True)
|
|
|
+ with_dn("ext16u", "and Wd, Wn, #0xffff")
|
|
|
+ with_dn("ext32s_i64", "sxtw Xd, Wn")
|
|
|
+-with_dn("ext32u_i64", "and Xd, Xn, #0xffffffff")
|
|
|
++with_dn("ext32u_i64", "mov Wd, Wn")
|
|
|
++
|
|
|
++# Numeric extraction.
|
|
|
++with_dn("extrl", "mov Wd, Wn")
|
|
|
++with_dn("extrh", "lsr Xd, Xn, #32")
|
|
|
++
|
|
|
++START_COLLECTION("byteswap")
|
|
|
+
|
|
|
+ # Byte swapping.
|
|
|
+ with_dn("bswap16", "rev w27, Wn", "lsr Wd, w27, #16")
|
|
|
+ with_dn("bswap32", "rev Wd, Wn")
|
|
|
+ with_dn("bswap64", "rev Xd, Xn")
|
|
|
+
|
|
|
+-# Memory barriers.
|
|
|
+-simple("mb_all", "dmb ish")
|
|
|
+-simple("mb_st", "dmb ishst")
|
|
|
+-simple("mb_ld", "dmb ishld")
|
|
|
+
|
|
|
+ # Handlers for QEMU_LD, which handles guest <- host loads.
|
|
|
+ for subtype in ('aligned', 'unaligned', 'slowpath'):
|
|
|
+ is_aligned = (subtype == 'aligned')
|
|
|
+ is_slowpath = (subtype == 'slowpath')
|
|
|
+
|
|
|
++ START_COLLECTION(f"qemu_ld_{subtype}_unsigned_le")
|
|
|
++
|
|
|
+ ld_thunk(f"qemu_ld_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
|
|
|
+ fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath,
|
|
|
+ )
|
|
|
+- ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu_signed",
|
|
|
+- fastpath_32b=["ldrsb Wd, [Xn, x27]"], fastpath_64b=["ldrsb Xd, [Xn, x27]"],
|
|
|
+- force_slowpath=is_slowpath,
|
|
|
+- )
|
|
|
+ ld_thunk(f"qemu_ld_leuw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu",
|
|
|
+ fastpath_32b=["ldrh Wd, [Xn, x27]"], fastpath_64b=["ldrh Wd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath,
|
|
|
+ )
|
|
|
+- ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu_signed",
|
|
|
+- fastpath_32b=["ldrsh Wd, [Xn, x27]"], fastpath_64b=["ldrsh Xd, [Xn, x27]"],
|
|
|
+- force_slowpath=is_slowpath,
|
|
|
+- )
|
|
|
+ ld_thunk(f"qemu_ld_leul_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu",
|
|
|
+ fastpath_32b=["ldr Wd, [Xn, x27]"], fastpath_64b=["ldr Wd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath,
|
|
|
+ )
|
|
|
++ ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
|
|
|
++ fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath,
|
|
|
++ )
|
|
|
++
|
|
|
++ START_COLLECTION(f"qemu_ld_{subtype}_signed_le")
|
|
|
++
|
|
|
++ ld_thunk(f"qemu_ld_sb_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu_signed",
|
|
|
++ fastpath_32b=["ldrsb Wd, [Xn, x27]"], fastpath_64b=["ldrsb Xd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath,
|
|
|
++ )
|
|
|
++ ld_thunk(f"qemu_ld_lesw_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_lduw_mmu_signed",
|
|
|
++ fastpath_32b=["ldrsh Wd, [Xn, x27]"], fastpath_64b=["ldrsh Xd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath,
|
|
|
++ )
|
|
|
+ ld_thunk(f"qemu_ld_lesl_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldul_mmu_signed",
|
|
|
+ fastpath_32b=["ldrsw Xd, [Xn, x27]"], fastpath_64b=["ldrsw Xd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath,
|
|
|
+ )
|
|
|
+- ld_thunk(f"qemu_ld_leq_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
|
|
|
++
|
|
|
++ # Special variant for the most common modes, as a speedup optimization.
|
|
|
++ ld_thunk(f"qemu_ld_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_ldub_mmu",
|
|
|
++ fastpath_32b=["ldrb Wd, [Xn, x27]"], fastpath_64b=["ldrb Wd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath, immediate=0x02
|
|
|
++ )
|
|
|
++ ld_thunk(f"qemu_ld_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
|
|
|
+ fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
|
|
|
+- force_slowpath=is_slowpath,
|
|
|
++ force_slowpath=is_slowpath, immediate=0x32
|
|
|
+ )
|
|
|
+-
|
|
|
+- # Special variant for the most common mode, as a speedup optimization.
|
|
|
+ ld_thunk(f"qemu_ld_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_ldq_mmu",
|
|
|
+ fastpath_32b=["ldr Xd, [Xn, x27]"], fastpath_64b=["ldr Xd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath, immediate=0x3a
|
|
|
+ )
|
|
|
+
|
|
|
++ START_COLLECTION(f"qemu_ld_{subtype}_be")
|
|
|
++
|
|
|
+ # For now, leave the rare/big-endian stuff slow-path only.
|
|
|
+ ld_thunk(f"qemu_ld_beuw_{subtype}", None, None, "helper_be_lduw_mmu",
|
|
|
+ is_aligned=is_aligned, force_slowpath=is_slowpath)
|
|
|
+@@ -747,11 +992,15 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ is_aligned=is_aligned, force_slowpath=is_slowpath)
|
|
|
+
|
|
|
+
|
|
|
++
|
|
|
++
|
|
|
+ # Handlers for QEMU_ST, which handles guest -> host stores.
|
|
|
+ for subtype in ('aligned', 'unaligned', 'slowpath'):
|
|
|
+ is_aligned = (subtype == 'aligned')
|
|
|
+ is_slowpath = (subtype == 'slowpath')
|
|
|
+
|
|
|
++ START_COLLECTION(f"qemu_st_{subtype}_le")
|
|
|
++
|
|
|
+ st_thunk(f"qemu_st_ub_{subtype}", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
|
|
|
+ fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath,
|
|
|
+@@ -770,11 +1019,21 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ )
|
|
|
+
|
|
|
+ # Special optimization for the most common modes.
|
|
|
++ st_thunk(f"qemu_st_ub_{subtype}_mode02", is_aligned=is_aligned, slowpath_helper="helper_ret_stb_mmu",
|
|
|
++ fastpath_32b=["strb Wd, [Xn, x27]"], fastpath_64b=["strb Wd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath, immediate=0x02
|
|
|
++ )
|
|
|
++ st_thunk(f"qemu_st_leq_{subtype}_mode32", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
|
|
|
++ fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
|
|
|
++ force_slowpath=is_slowpath, immediate=0x32
|
|
|
++ )
|
|
|
+ st_thunk(f"qemu_st_leq_{subtype}_mode3a", is_aligned=is_aligned, slowpath_helper="helper_le_stq_mmu",
|
|
|
+ fastpath_32b=["str Xd, [Xn, x27]"], fastpath_64b=["str Xd, [Xn, x27]"],
|
|
|
+ force_slowpath=is_slowpath, immediate=0x3a
|
|
|
+ )
|
|
|
+
|
|
|
++ START_COLLECTION(f"qemu_st_{subtype}_be")
|
|
|
++
|
|
|
+ # For now, leave the rare/big-endian stuff slow-path only.
|
|
|
+ st_thunk(f"qemu_st_beuw_{subtype}", None, None, "helper_be_stw_mmu",
|
|
|
+ is_aligned=is_aligned, force_slowpath=is_slowpath)
|
|
|
+@@ -784,5 +1043,121 @@ def st_thunk(name, fastpath_32b, fastpath_64b, slowpath_helper, immediate=None,
|
|
|
+ is_aligned=is_aligned, force_slowpath=is_slowpath)
|
|
|
+
|
|
|
+
|
|
|
++#
|
|
|
++# SIMD/Vector ops
|
|
|
++#
|
|
|
++
|
|
|
++# SIMD MOVI instructions.
|
|
|
++START_COLLECTION(f"simd_base")
|
|
|
++
|
|
|
++# Unoptimized/unoptimizable load of a vector64; grabbing an immediate.
|
|
|
++with_d("ldi_d", "ldr Dd, [x28], #8")
|
|
|
++with_d("ldi_q", "ldr Qd, [x28], #16")
|
|
|
++
|
|
|
++# General purpose reg -> vec rec loads
|
|
|
++vector_dn("dup", "dup Vd.S, Wn")
|
|
|
++
|
|
|
++# move vector -> GP reg
|
|
|
++with_dn("umov_s0", "umov Wd, Vn.s[0]")
|
|
|
++with_dn("umov_d0", "umov Xd, Vn.d[0]")
|
|
|
++
|
|
|
++# mov GP reg -> vector
|
|
|
++with_dn("ins_s0", "ins Vd.s[0], Wn")
|
|
|
++with_dn("ins_d0", "ins Vd.d[0], Xn")
|
|
|
++
|
|
|
++
|
|
|
++# Memory -> vec reg loads.
|
|
|
++# The offset of the load is stored in a 64b immediate.
|
|
|
++
|
|
|
++# Duplicating load.
|
|
|
++# TODO: possibly squish the add into the ld1r, if that's valid?
|
|
|
++vector_dn("dupm", "ldr x27, [x28], #8", "add x27, x27, Xn", "ld1r {Vd.S}, [x27]")
|
|
|
++
|
|
|
++# Direct loads.
|
|
|
++with_dn("ldr_d", "ldr x27, [x28], #8", "ldr Dd, [Xn, x27]")
|
|
|
++with_dn("ldr_q", "ldr x27, [x28], #8", "ldr Qd, [Xn, x27]")
|
|
|
++
|
|
|
++# vec -> reg stores.
|
|
|
++# The offset of the stores is stored in a 64b immediate.
|
|
|
++with_dn("str_d", "ldr x27, [x28], #8", "str Dd, [Xn, x27]")
|
|
|
++with_dn("str_q", "ldr x27, [x28], #8", "str Qd, [Xn, x27]")
|
|
|
++
|
|
|
++
|
|
|
++START_COLLECTION(f"simd_arithmetic")
|
|
|
++
|
|
|
++vector_math_dnm("add", "add")
|
|
|
++vector_math_dnm("usadd", "uqadd")
|
|
|
++vector_math_dnm("ssadd", "sqadd")
|
|
|
++vector_math_dnm("sub", "sub")
|
|
|
++vector_math_dnm("ussub", "uqsub")
|
|
|
++vector_math_dnm("sssub", "sqsub")
|
|
|
++vector_math_dnm_no64("mul", "mul")
|
|
|
++vector_math_dnm_no64("smax", "smax")
|
|
|
++vector_math_dnm_no64("smin", "smin")
|
|
|
++vector_math_dnm_no64("umax", "umax")
|
|
|
++vector_math_dnm_no64("umin", "umin")
|
|
|
++
|
|
|
++START_COLLECTION(f"simd_logical")
|
|
|
++
|
|
|
++vector_logic_dnm("and", "and")
|
|
|
++vector_logic_dnm("andc", "bic")
|
|
|
++vector_logic_dnm("or", "orr")
|
|
|
++vector_logic_dnm("orc", "orn")
|
|
|
++vector_logic_dnm("xor", "eor")
|
|
|
++vector_logic_dn( "not", "not")
|
|
|
++vector_dn("neg", "neg Vd.S, Vn.S")
|
|
|
++vector_dn("abs", "abs Vd.S, Vn.S")
|
|
|
++vector_logic_dnm( "bit", "bit")
|
|
|
++vector_logic_dnm( "bif", "bif")
|
|
|
++vector_logic_dnm( "bsl", "bsl")
|
|
|
++
|
|
|
++vector_math_dnm("shlv", "ushl")
|
|
|
++vector_math_dnm("sshl", "sshl")
|
|
|
++
|
|
|
++vector_dnm("cmeq", "cmeq Vd.S, Vn.S, Vm.S", scalar="cmeq Dd, Dn, Dm")
|
|
|
++vector_dnm("cmgt", "cmgt Vd.S, Vn.S, Vm.S", scalar="cmgt Dd, Dn, Dm")
|
|
|
++vector_dnm("cmge", "cmge Vd.S, Vn.S, Vm.S", scalar="cmge Dd, Dn, Dm")
|
|
|
++vector_dnm("cmhi", "cmhi Vd.S, Vn.S, Vm.S", scalar="cmhi Dd, Dn, Dm")
|
|
|
++vector_dnm("cmhs", "cmhs Vd.S, Vn.S, Vm.S", scalar="cmhs Dd, Dn, Dm")
|
|
|
++
|
|
|
++START_COLLECTION(f"simd_immediate")
|
|
|
++
|
|
|
++# Simple imm8 movs...
|
|
|
++with_d_immediate("movi_cmode_e_op0_q0", "movi Vd.8b, #Ii", immediate_range=range(256))
|
|
|
++with_d_immediate("movi_cmode_e_op0_q1", "movi Vd.16b, #Ii", immediate_range=range(256))
|
|
|
++
|
|
|
++# ... all 00/FF movs...
|
|
|
++with_d_immediate("movi_cmode_e_op1_q0", "movi Dd, #Si", immediate_range=range(256))
|
|
|
++with_d_immediate("movi_cmode_e_op1_q1", "movi Vd.2d, #Si", immediate_range=range(256))
|
|
|
++
|
|
|
++# Halfword MOVs.
|
|
|
++with_d_immediate("movi_cmode_8_op0_q0", "movi Vd.4h, #Ii", immediate_range=range(256))
|
|
|
++with_d_immediate("movi_cmode_8_op0_q1", "movi Vd.8h, #Ii", immediate_range=range(256))
|
|
|
++with_d_immediate("mvni_cmode_8_op0_q0", "mvni Vd.4h, #Ii", immediate_range=range(256))
|
|
|
++with_d_immediate("mvni_cmode_8_op0_q1", "mvni Vd.8h, #Ii", immediate_range=range(256))
|
|
|
++with_d_immediate("movi_cmode_a_op0_q0", "movi Vd.4h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++with_d_immediate("movi_cmode_a_op0_q1", "movi Vd.8h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++with_d_immediate("mvni_cmode_a_op0_q0", "mvni Vd.4h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++with_d_immediate("mvni_cmode_a_op0_q1", "mvni Vd.8h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++
|
|
|
++# Halfword ORIs, for building complex MOVs.
|
|
|
++with_d_immediate("orr_cmode_a_op0_q0", "orr Vd.4h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++with_d_immediate("orr_cmode_a_op0_q1", "orr Vd.8h, #Ii, lsl #8", immediate_range=range(256))
|
|
|
++
|
|
|
++
|
|
|
++# Print a list of output files generated.
|
|
|
++output_c_filenames = (f"'tcti_{name}_gadgets.c'" for name in output_files.keys())
|
|
|
++output_h_filenames = (f"'tcti_{name}_gadgets.h'" for name in output_files.keys())
|
|
|
++
|
|
|
++print("Sources generated:", file=sys.stderr)
|
|
|
++print(f"gadgets = [", file=sys.stderr)
|
|
|
++print(" tcti_gadgets.h,", file=sys.stderr)
|
|
|
++
|
|
|
++for name in output_files.keys():
|
|
|
++ print(f" 'tcti_{name}_gadgets.c',", file=sys.stderr)
|
|
|
++ print(f" 'tcti_{name}_gadgets.h',", file=sys.stderr)
|
|
|
++
|
|
|
++print(f"]", file=sys.stderr)
|
|
|
++
|
|
|
+ # Statistics.
|
|
|
+-sys.stderr.write(f"\nGenerated {gadgets} gadgets with {instructions} instructions ({instructions * 4} B).\n\n")
|
|
|
++sys.stderr.write(f"\nGenerated {gadgets} gadgets with {instructions} instructions (~{(instructions * 4) // 1024 // 1024} MiB).\n\n")
|
|
|
+diff --git a/util/osdep.c b/util/osdep.c
|
|
|
+index 81c46df6f5..8df113c2df 100644
|
|
|
+--- a/util/osdep.c
|
|
|
++++ b/util/osdep.c
|
|
|
+@@ -114,6 +114,12 @@ int qemu_mprotect_none(void *addr, size_t size)
|
|
|
+ #ifdef _WIN32
|
|
|
+ return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
|
|
|
+ #else
|
|
|
++# if defined(__APPLE__) && defined(__arm64__)
|
|
|
++ if (__builtin_available(macOS 11.2, *)) {
|
|
|
++ /* mprotect() in macOS 11.2 can't switch RWX to NONE */
|
|
|
++ return 0;
|
|
|
++ }
|
|
|
++# endif
|
|
|
+ return qemu_mprotect__osdep(addr, size, PROT_NONE);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+From patchwork Fri Dec 23 08:50:46 2022
|
|
|
+Content-Type: text/plain; charset="utf-8"
|
|
|
+MIME-Version: 1.0
|
|
|
+Content-Transfer-Encoding: 7bit
|
|
|
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
|
|
|
+X-Patchwork-Id: 13080757
|
|
|
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
|
|
|
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
|
|
+ aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
|
|
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
|
|
|
+ (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
|
|
|
+ (No client certificate requested)
|
|
|
+ by smtp.lore.kernel.org (Postfix) with ESMTPS id 07CC3C4167B
|
|
|
+ for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
|
|
|
+Received: from localhost ([::1] helo=lists1p.gnu.org)
|
|
|
+ by lists.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <qemu-devel-bounces@nongnu.org>)
|
|
|
+ id 1p8dlq-0007Qq-3N; Fri, 23 Dec 2022 03:51:15 -0500
|
|
|
+Received: from eggs.gnu.org ([2001:470:142:3::10])
|
|
|
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
|
|
|
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p8dlb-0007K8-Ti; Fri, 23 Dec 2022 03:51:00 -0500
|
|
|
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
|
|
|
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p8dlZ-00046m-Nu; Fri, 23 Dec 2022 03:50:59 -0500
|
|
|
+Received: from localhost.localdomain
|
|
|
+ (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
|
|
|
+ by csgraf.de (Postfix) with ESMTPSA id 0231260804D4;
|
|
|
+ Fri, 23 Dec 2022 09:50:48 +0100 (CET)
|
|
|
+From: Alexander Graf <agraf@csgraf.de>
|
|
|
+To: qemu-devel@nongnu.org
|
|
|
+Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
|
|
|
+ Yanan Wang <wangyanan55@huawei.com>,
|
|
|
+ =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
|
|
|
+ Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
|
|
|
+ Eduardo Habkost <eduardo@habkost.net>,
|
|
|
+ Shashi Mallela <shashi.mallela@linaro.org>,
|
|
|
+ Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
|
|
|
+Subject: [PATCH 1/2] hw/intc/arm_gicv3: Make ITT entry size configurable
|
|
|
+Date: Fri, 23 Dec 2022 09:50:46 +0100
|
|
|
+Message-Id: <20221223085047.94832-2-agraf@csgraf.de>
|
|
|
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
|
|
|
+In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
|
|
|
+References: <20221223085047.94832-1-agraf@csgraf.de>
|
|
|
+MIME-Version: 1.0
|
|
|
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
|
|
|
+ helo=zulu616.server4you.de
|
|
|
+X-Spam_score_int: -18
|
|
|
+X-Spam_score: -1.9
|
|
|
+X-Spam_bar: -
|
|
|
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
|
|
|
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
|
|
|
+X-Spam_action: no action
|
|
|
+X-BeenThere: qemu-devel@nongnu.org
|
|
|
+X-Mailman-Version: 2.1.29
|
|
|
+Precedence: list
|
|
|
+List-Id: <qemu-devel.nongnu.org>
|
|
|
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
|
|
|
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
|
|
|
+List-Post: <mailto:qemu-devel@nongnu.org>
|
|
|
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
|
|
|
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
|
|
|
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+
|
|
|
+An ITT entry is opaque to the OS. The only thing it does get told by HW is
|
|
|
+its size. In theory, that size can be any byte aligned number, in practice
|
|
|
+HW will always use power of 2s to simplify offset calculation. We currently
|
|
|
+expose the size as 12, which is not a power of 2.
|
|
|
+
|
|
|
+To prepare for a future where we expose power of 2 sized entry sizes, let's
|
|
|
+make the size itself configurable. We only need to watch out that we don't
|
|
|
+have an entry be smaller than the fields we want to access inside. Bigger
|
|
|
+is always fine.
|
|
|
+
|
|
|
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
|
|
|
+---
|
|
|
+ hw/intc/arm_gicv3_its.c | 14 +++++++++++---
|
|
|
+ hw/intc/gicv3_internal.h | 2 +-
|
|
|
+ include/hw/intc/arm_gicv3_its_common.h | 1 +
|
|
|
+ 3 files changed, 13 insertions(+), 4 deletions(-)
|
|
|
+
|
|
|
+diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
|
|
|
+index 57c79da5c5..e7cabeb46c 100644
|
|
|
+--- a/hw/intc/arm_gicv3_its.c
|
|
|
++++ b/hw/intc/arm_gicv3_its.c
|
|
|
+@@ -215,7 +215,7 @@ static bool update_ite(GICv3ITSState *s, uint32_t eventid, const DTEntry *dte,
|
|
|
+ {
|
|
|
+ AddressSpace *as = &s->gicv3->dma_as;
|
|
|
+ MemTxResult res = MEMTX_OK;
|
|
|
+- hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
|
|
|
++ hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
|
|
|
+ uint64_t itel = 0;
|
|
|
+ uint32_t iteh = 0;
|
|
|
+
|
|
|
+@@ -253,7 +253,7 @@ static MemTxResult get_ite(GICv3ITSState *s, uint32_t eventid,
|
|
|
+ MemTxResult res = MEMTX_OK;
|
|
|
+ uint64_t itel;
|
|
|
+ uint32_t iteh;
|
|
|
+- hwaddr iteaddr = dte->ittaddr + eventid * ITS_ITT_ENTRY_SIZE;
|
|
|
++ hwaddr iteaddr = dte->ittaddr + eventid * s->itt_entry_size;
|
|
|
+
|
|
|
+ itel = address_space_ldq_le(as, iteaddr, MEMTXATTRS_UNSPECIFIED, &res);
|
|
|
+ if (res != MEMTX_OK) {
|
|
|
+@@ -1934,6 +1934,12 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++ if (s->itt_entry_size < MIN_ITS_ITT_ENTRY_SIZE) {
|
|
|
++ error_setg(errp, "ITT entry size must be at least %d",
|
|
|
++ MIN_ITS_ITT_ENTRY_SIZE);
|
|
|
++ return;
|
|
|
++ }
|
|
|
++
|
|
|
+ gicv3_add_its(s->gicv3, dev);
|
|
|
+
|
|
|
+ gicv3_its_init_mmio(s, &gicv3_its_control_ops, &gicv3_its_translation_ops);
|
|
|
+@@ -1941,7 +1947,7 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp)
|
|
|
+ /* set the ITS default features supported */
|
|
|
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, PHYSICAL, 1);
|
|
|
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, ITT_ENTRY_SIZE,
|
|
|
+- ITS_ITT_ENTRY_SIZE - 1);
|
|
|
++ s->itt_entry_size - 1);
|
|
|
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, IDBITS, ITS_IDBITS);
|
|
|
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, DEVBITS, ITS_DEVBITS);
|
|
|
+ s->typer = FIELD_DP64(s->typer, GITS_TYPER, CIL, 1);
|
|
|
+@@ -2008,6 +2014,8 @@ static void gicv3_its_post_load(GICv3ITSState *s)
|
|
|
+ static Property gicv3_its_props[] = {
|
|
|
+ DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
|
|
|
+ GICv3State *),
|
|
|
++ DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
|
|
|
++ MIN_ITS_ITT_ENTRY_SIZE),
|
|
|
+ DEFINE_PROP_END_OF_LIST(),
|
|
|
+ };
|
|
|
+
|
|
|
+diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
|
|
|
+index 29d5cdc1b6..2aca1ba095 100644
|
|
|
+--- a/hw/intc/gicv3_internal.h
|
|
|
++++ b/hw/intc/gicv3_internal.h
|
|
|
+@@ -450,7 +450,7 @@ FIELD(VINVALL_1, VPEID, 32, 16)
|
|
|
+ * the value of that field in memory cannot be relied upon -- older
|
|
|
+ * versions of QEMU did not correctly write to that memory.)
|
|
|
+ */
|
|
|
+-#define ITS_ITT_ENTRY_SIZE 0xC
|
|
|
++#define MIN_ITS_ITT_ENTRY_SIZE 0xC
|
|
|
+
|
|
|
+ FIELD(ITE_L, VALID, 0, 1)
|
|
|
+ FIELD(ITE_L, INTTYPE, 1, 1)
|
|
|
+diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
|
|
|
+index a11a0f6654..e730a5482c 100644
|
|
|
+--- a/include/hw/intc/arm_gicv3_its_common.h
|
|
|
++++ b/include/hw/intc/arm_gicv3_its_common.h
|
|
|
+@@ -66,6 +66,7 @@ struct GICv3ITSState {
|
|
|
+ int dev_fd; /* kvm device fd if backed by kvm vgic support */
|
|
|
+ uint64_t gits_translater_gpa;
|
|
|
+ bool translater_gpa_known;
|
|
|
++ uint8_t itt_entry_size;
|
|
|
+
|
|
|
+ /* Registers */
|
|
|
+ uint32_t ctlr;
|
|
|
+
|
|
|
+From patchwork Fri Dec 23 08:50:47 2022
|
|
|
+Content-Type: text/plain; charset="utf-8"
|
|
|
+MIME-Version: 1.0
|
|
|
+Content-Transfer-Encoding: 7bit
|
|
|
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
|
|
|
+X-Patchwork-Id: 13080758
|
|
|
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
|
|
|
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
|
|
+ aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
|
|
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
|
|
|
+ (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
|
|
|
+ (No client certificate requested)
|
|
|
+ by smtp.lore.kernel.org (Postfix) with ESMTPS id 07C93C4332F
|
|
|
+ for <qemu-devel@archiver.kernel.org>; Fri, 23 Dec 2022 08:51:39 +0000 (UTC)
|
|
|
+Received: from localhost ([::1] helo=lists1p.gnu.org)
|
|
|
+ by lists.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <qemu-devel-bounces@nongnu.org>)
|
|
|
+ id 1p8dlw-0007Sh-C5; Fri, 23 Dec 2022 03:51:20 -0500
|
|
|
+Received: from eggs.gnu.org ([2001:470:142:3::10])
|
|
|
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
|
|
|
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p8dle-0007LZ-9W; Fri, 23 Dec 2022 03:51:02 -0500
|
|
|
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
|
|
|
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p8dlc-00046r-NI; Fri, 23 Dec 2022 03:51:02 -0500
|
|
|
+Received: from localhost.localdomain
|
|
|
+ (dynamic-095-118-065-151.95.118.pool.telefonica.de [95.118.65.151])
|
|
|
+ by csgraf.de (Postfix) with ESMTPSA id 747226080975;
|
|
|
+ Fri, 23 Dec 2022 09:50:49 +0100 (CET)
|
|
|
+From: Alexander Graf <agraf@csgraf.de>
|
|
|
+To: qemu-devel@nongnu.org
|
|
|
+Cc: Peter Maydell <peter.maydell@linaro.org>, qemu-arm@nongnu.org,
|
|
|
+ Yanan Wang <wangyanan55@huawei.com>,
|
|
|
+ =?utf-8?q?Philippe_Mathieu-Daud=C3=A9?= <philmd@linaro.org>,
|
|
|
+ Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
|
|
|
+ Eduardo Habkost <eduardo@habkost.net>,
|
|
|
+ Shashi Mallela <shashi.mallela@linaro.org>,
|
|
|
+ Eric Auger <eric.auger@redhat.com>, Neil Armstrong <narmstrong@baylibre.com>
|
|
|
+Subject: [PATCH 2/2] hw/intc/arm_gicv3: Bump ITT entry size to 16
|
|
|
+Date: Fri, 23 Dec 2022 09:50:47 +0100
|
|
|
+Message-Id: <20221223085047.94832-3-agraf@csgraf.de>
|
|
|
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
|
|
|
+In-Reply-To: <20221223085047.94832-1-agraf@csgraf.de>
|
|
|
+References: <20221223085047.94832-1-agraf@csgraf.de>
|
|
|
+MIME-Version: 1.0
|
|
|
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
|
|
|
+ helo=zulu616.server4you.de
|
|
|
+X-Spam_score_int: -18
|
|
|
+X-Spam_score: -1.9
|
|
|
+X-Spam_bar: -
|
|
|
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
|
|
|
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
|
|
|
+X-Spam_action: no action
|
|
|
+X-BeenThere: qemu-devel@nongnu.org
|
|
|
+X-Mailman-Version: 2.1.29
|
|
|
+Precedence: list
|
|
|
+List-Id: <qemu-devel.nongnu.org>
|
|
|
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
|
|
|
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
|
|
|
+List-Post: <mailto:qemu-devel@nongnu.org>
|
|
|
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
|
|
|
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
|
|
|
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+
|
|
|
+Some Operating Systems (like Windows) can only deal with ITT entry sizes
|
|
|
+that are a power of 2. While the spec allows arbitrarily sized ITT entry
|
|
|
+sizes, in practice all hardware will use power of 2 because that
|
|
|
+simplifies offset calculation and ensures that a power of 2 sized region
|
|
|
+can hold a set of entries without gap at the end.
|
|
|
+
|
|
|
+So let's just bump the entry size to 16. That gives us enough space for
|
|
|
+the 12 bytes of data that we want to have in each ITT entry and makes
|
|
|
+QEMU look a bit more like real hardware.
|
|
|
+
|
|
|
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
|
|
|
+---
|
|
|
+ hw/core/machine.c | 4 +++-
|
|
|
+ hw/intc/arm_gicv3_its.c | 3 +--
|
|
|
+ 2 files changed, 4 insertions(+), 3 deletions(-)
|
|
|
+
|
|
|
+diff --git a/hw/core/machine.c b/hw/core/machine.c
|
|
|
+index 8d34caa31d..c81b3810c2 100644
|
|
|
+--- a/hw/core/machine.c
|
|
|
++++ b/hw/core/machine.c
|
|
|
+@@ -42,6 +42,7 @@
|
|
|
+
|
|
|
+ GlobalProperty hw_compat_7_1[] = {
|
|
|
+ { "virtio-device", "queue_reset", "false" },
|
|
|
++ { "arm-gicv3-its", "itt-entry-size", "12" },
|
|
|
+ };
|
|
|
+ const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1);
|
|
|
+
|
|
|
+diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c
|
|
|
+index e7cabeb46c..6754523321 100644
|
|
|
+--- a/hw/intc/arm_gicv3_its.c
|
|
|
++++ b/hw/intc/arm_gicv3_its.c
|
|
|
+@@ -2014,8 +2014,7 @@ static void gicv3_its_post_load(GICv3ITSState *s)
|
|
|
+ static Property gicv3_its_props[] = {
|
|
|
+ DEFINE_PROP_LINK("parent-gicv3", GICv3ITSState, gicv3, "arm-gicv3",
|
|
|
+ GICv3State *),
|
|
|
+- DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size,
|
|
|
+- MIN_ITS_ITT_ENTRY_SIZE),
|
|
|
++ DEFINE_PROP_UINT8("itt-entry-size", GICv3ITSState, itt_entry_size, 16),
|
|
|
+ DEFINE_PROP_END_OF_LIST(),
|
|
|
+ };
|
|
|
+
|
|
|
+From patchwork Mon Dec 19 22:08:08 2022
|
|
|
+Content-Type: text/plain; charset="utf-8"
|
|
|
+MIME-Version: 1.0
|
|
|
+Content-Transfer-Encoding: 7bit
|
|
|
+X-Patchwork-Submitter: Alexander Graf <agraf@csgraf.de>
|
|
|
+X-Patchwork-Id: 13077199
|
|
|
+Return-Path: <qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org>
|
|
|
+X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
|
|
+ aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
|
|
+Received: from lists.gnu.org (lists.gnu.org [209.51.188.17])
|
|
|
+ (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
|
|
|
+ (No client certificate requested)
|
|
|
+ by smtp.lore.kernel.org (Postfix) with ESMTPS id A8832C4332F
|
|
|
+ for <qemu-devel@archiver.kernel.org>; Mon, 19 Dec 2022 22:09:07 +0000 (UTC)
|
|
|
+Received: from localhost ([::1] helo=lists1p.gnu.org)
|
|
|
+ by lists.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <qemu-devel-bounces@nongnu.org>)
|
|
|
+ id 1p7OJ4-0007i9-Cx; Mon, 19 Dec 2022 17:08:22 -0500
|
|
|
+Received: from eggs.gnu.org ([2001:470:142:3::10])
|
|
|
+ by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256)
|
|
|
+ (Exim 4.90_1) (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p7OIz-0007hY-J0; Mon, 19 Dec 2022 17:08:20 -0500
|
|
|
+Received: from mail.csgraf.de ([85.25.223.15] helo=zulu616.server4you.de)
|
|
|
+ by eggs.gnu.org with esmtp (Exim 4.90_1)
|
|
|
+ (envelope-from <agraf@csgraf.de>)
|
|
|
+ id 1p7OIv-0003o3-7R; Mon, 19 Dec 2022 17:08:15 -0500
|
|
|
+Received: from localhost.localdomain
|
|
|
+ (dynamic-077-002-090-134.77.2.pool.telefonica.de [77.2.90.134])
|
|
|
+ by csgraf.de (Postfix) with ESMTPSA id D1CBF60806FC;
|
|
|
+ Mon, 19 Dec 2022 23:08:09 +0100 (CET)
|
|
|
+From: Alexander Graf <agraf@csgraf.de>
|
|
|
+To: qemu-devel@nongnu.org
|
|
|
+Cc: Peter Maydell <peter.maydell@linaro.org>,
|
|
|
+ qemu-arm@nongnu.org
|
|
|
+Subject: [PATCH] hvf: arm: Add support for GICv3
|
|
|
+Date: Mon, 19 Dec 2022 23:08:08 +0100
|
|
|
+Message-Id: <20221219220808.26392-1-agraf@csgraf.de>
|
|
|
+X-Mailer: git-send-email 2.37.1 (Apple Git-137.1)
|
|
|
+MIME-Version: 1.0
|
|
|
+Received-SPF: pass client-ip=85.25.223.15; envelope-from=agraf@csgraf.de;
|
|
|
+ helo=zulu616.server4you.de
|
|
|
+X-Spam_score_int: -18
|
|
|
+X-Spam_score: -1.9
|
|
|
+X-Spam_bar: -
|
|
|
+X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001,
|
|
|
+ SPF_PASS=-0.001 autolearn=ham autolearn_force=no
|
|
|
+X-Spam_action: no action
|
|
|
+X-BeenThere: qemu-devel@nongnu.org
|
|
|
+X-Mailman-Version: 2.1.29
|
|
|
+Precedence: list
|
|
|
+List-Id: <qemu-devel.nongnu.org>
|
|
|
+List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
|
|
|
+List-Archive: <https://lists.nongnu.org/archive/html/qemu-devel>
|
|
|
+List-Post: <mailto:qemu-devel@nongnu.org>
|
|
|
+List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
|
|
|
+List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
|
|
|
+ <mailto:qemu-devel-request@nongnu.org?subject=subscribe>
|
|
|
+Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org
|
|
|
+
|
|
|
+We currently only support GICv2 emulation. To also support GICv3, we will
|
|
|
+need to pass a few system registers into their respective handler functions.
|
|
|
+
|
|
|
+This patch adds support for HVF to call into the TCG callbacks for GICv3
|
|
|
+system register handlers. This is safe because the GICv3 TCG code is generic
|
|
|
+as long as we limit ourselves to EL0 and EL1 - which are the only modes
|
|
|
+supported by HVF.
|
|
|
+
|
|
|
+To make sure nobody trips over that, we also annotate callbacks that don't
|
|
|
+work in HVF mode, such as EL state change hooks.
|
|
|
+
|
|
|
+With GICv3 support in place, we can run with more than 8 vCPUs.
|
|
|
+
|
|
|
+Signed-off-by: Alexander Graf <agraf@csgraf.de>
|
|
|
+---
|
|
|
+ hw/intc/arm_gicv3_cpuif.c | 8 +-
|
|
|
+ target/arm/hvf/hvf.c | 151 ++++++++++++++++++++++++++++++++++++
|
|
|
+ target/arm/hvf/trace-events | 2 +
|
|
|
+ 3 files changed, 160 insertions(+), 1 deletion(-)
|
|
|
+
|
|
|
+diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
|
|
|
+index b17b29288c..b4e387268c 100644
|
|
|
+--- a/hw/intc/arm_gicv3_cpuif.c
|
|
|
++++ b/hw/intc/arm_gicv3_cpuif.c
|
|
|
+@@ -21,6 +21,7 @@
|
|
|
+ #include "hw/irq.h"
|
|
|
+ #include "cpu.h"
|
|
|
+ #include "target/arm/cpregs.h"
|
|
|
++#include "sysemu/tcg.h"
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Special case return value from hppvi_index(); must be larger than
|
|
|
+@@ -2810,6 +2811,8 @@ void gicv3_init_cpuif(GICv3State *s)
|
|
|
+ * which case we'd get the wrong value.
|
|
|
+ * So instead we define the regs with no ri->opaque info, and
|
|
|
+ * get back to the GICv3CPUState from the CPUARMState.
|
|
|
++ *
|
|
|
++ * These CP regs callbacks can be called from either TCG or HVF code.
|
|
|
+ */
|
|
|
+ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
|
|
|
+
|
|
|
+@@ -2905,6 +2908,9 @@ void gicv3_init_cpuif(GICv3State *s)
|
|
|
+ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo);
|
|
|
+ }
|
|
|
+ }
|
|
|
+- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs);
|
|
|
++ if (tcg_enabled()) {
|
|
|
++ /* We can only trap EL changes with TCG for now */
|
|
|
++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs);
|
|
|
++ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
|
|
|
+index 060aa0ccf4..8ea4be5f30 100644
|
|
|
+--- a/target/arm/hvf/hvf.c
|
|
|
++++ b/target/arm/hvf/hvf.c
|
|
|
+@@ -80,6 +80,33 @@
|
|
|
+ #define SYSREG_PMCCNTR_EL0 SYSREG(3, 3, 9, 13, 0)
|
|
|
+ #define SYSREG_PMCCFILTR_EL0 SYSREG(3, 3, 14, 15, 7)
|
|
|
+
|
|
|
++#define SYSREG_ICC_AP0R0_EL1 SYSREG(3, 0, 12, 8, 4)
|
|
|
++#define SYSREG_ICC_AP0R1_EL1 SYSREG(3, 0, 12, 8, 5)
|
|
|
++#define SYSREG_ICC_AP0R2_EL1 SYSREG(3, 0, 12, 8, 6)
|
|
|
++#define SYSREG_ICC_AP0R3_EL1 SYSREG(3, 0, 12, 8, 7)
|
|
|
++#define SYSREG_ICC_AP1R0_EL1 SYSREG(3, 0, 12, 9, 0)
|
|
|
++#define SYSREG_ICC_AP1R1_EL1 SYSREG(3, 0, 12, 9, 1)
|
|
|
++#define SYSREG_ICC_AP1R2_EL1 SYSREG(3, 0, 12, 9, 2)
|
|
|
++#define SYSREG_ICC_AP1R3_EL1 SYSREG(3, 0, 12, 9, 3)
|
|
|
++#define SYSREG_ICC_ASGI1R_EL1 SYSREG(3, 0, 12, 11, 6)
|
|
|
++#define SYSREG_ICC_BPR0_EL1 SYSREG(3, 0, 12, 8, 3)
|
|
|
++#define SYSREG_ICC_BPR1_EL1 SYSREG(3, 0, 12, 12, 3)
|
|
|
++#define SYSREG_ICC_CTLR_EL1 SYSREG(3, 0, 12, 12, 4)
|
|
|
++#define SYSREG_ICC_DIR_EL1 SYSREG(3, 0, 12, 11, 1)
|
|
|
++#define SYSREG_ICC_EOIR0_EL1 SYSREG(3, 0, 12, 8, 1)
|
|
|
++#define SYSREG_ICC_EOIR1_EL1 SYSREG(3, 0, 12, 12, 1)
|
|
|
++#define SYSREG_ICC_HPPIR0_EL1 SYSREG(3, 0, 12, 8, 2)
|
|
|
++#define SYSREG_ICC_HPPIR1_EL1 SYSREG(3, 0, 12, 12, 2)
|
|
|
++#define SYSREG_ICC_IAR0_EL1 SYSREG(3, 0, 12, 8, 0)
|
|
|
++#define SYSREG_ICC_IAR1_EL1 SYSREG(3, 0, 12, 12, 0)
|
|
|
++#define SYSREG_ICC_IGRPEN0_EL1 SYSREG(3, 0, 12, 12, 6)
|
|
|
++#define SYSREG_ICC_IGRPEN1_EL1 SYSREG(3, 0, 12, 12, 7)
|
|
|
++#define SYSREG_ICC_PMR_EL1 SYSREG(3, 0, 4, 6, 0)
|
|
|
++#define SYSREG_ICC_RPR_EL1 SYSREG(3, 0, 12, 11, 3)
|
|
|
++#define SYSREG_ICC_SGI0R_EL1 SYSREG(3, 0, 12, 11, 7)
|
|
|
++#define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 12, 11, 5)
|
|
|
++#define SYSREG_ICC_SRE_EL1 SYSREG(3, 0, 12, 12, 5)
|
|
|
++
|
|
|
+ #define WFX_IS_WFE (1 << 0)
|
|
|
+
|
|
|
+ #define TMR_CTL_ENABLE (1 << 0)
|
|
|
+@@ -788,6 +815,43 @@ static bool is_id_sysreg(uint32_t reg)
|
|
|
+ SYSREG_CRM(reg) < 8;
|
|
|
+ }
|
|
|
+
|
|
|
++static uint32_t hvf_reg2cp_reg(uint32_t reg)
|
|
|
++{
|
|
|
++ return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
|
|
|
++ (reg >> 10) & 0xf,
|
|
|
++ (reg >> 1) & 0xf,
|
|
|
++ (reg >> 20) & 0x3,
|
|
|
++ (reg >> 14) & 0x7,
|
|
|
++ (reg >> 17) & 0x7);
|
|
|
++}
|
|
|
++
|
|
|
++static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
|
|
|
++{
|
|
|
++ ARMCPU *arm_cpu = ARM_CPU(cpu);
|
|
|
++ CPUARMState *env = &arm_cpu->env;
|
|
|
++ const ARMCPRegInfo *ri;
|
|
|
++
|
|
|
++ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
|
|
|
++ if (ri) {
|
|
|
++ if (ri->accessfn) {
|
|
|
++ if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) {
|
|
|
++ return false;
|
|
|
++ }
|
|
|
++ }
|
|
|
++ if (ri->type & ARM_CP_CONST) {
|
|
|
++ *val = ri->resetvalue;
|
|
|
++ } else if (ri->readfn) {
|
|
|
++ *val = ri->readfn(env, ri);
|
|
|
++ } else {
|
|
|
++ *val = CPREG_FIELD64(env, ri);
|
|
|
++ }
|
|
|
++ trace_hvf_vgic_read(ri->name, *val);
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ return false;
|
|
|
++}
|
|
|
++
|
|
|
+ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
|
|
|
+ {
|
|
|
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
|
|
|
+@@ -839,6 +903,36 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
|
|
|
+ case SYSREG_OSDLR_EL1:
|
|
|
+ /* Dummy register */
|
|
|
+ break;
|
|
|
++ case SYSREG_ICC_AP0R0_EL1:
|
|
|
++ case SYSREG_ICC_AP0R1_EL1:
|
|
|
++ case SYSREG_ICC_AP0R2_EL1:
|
|
|
++ case SYSREG_ICC_AP0R3_EL1:
|
|
|
++ case SYSREG_ICC_AP1R0_EL1:
|
|
|
++ case SYSREG_ICC_AP1R1_EL1:
|
|
|
++ case SYSREG_ICC_AP1R2_EL1:
|
|
|
++ case SYSREG_ICC_AP1R3_EL1:
|
|
|
++ case SYSREG_ICC_ASGI1R_EL1:
|
|
|
++ case SYSREG_ICC_BPR0_EL1:
|
|
|
++ case SYSREG_ICC_BPR1_EL1:
|
|
|
++ case SYSREG_ICC_DIR_EL1:
|
|
|
++ case SYSREG_ICC_EOIR0_EL1:
|
|
|
++ case SYSREG_ICC_EOIR1_EL1:
|
|
|
++ case SYSREG_ICC_HPPIR0_EL1:
|
|
|
++ case SYSREG_ICC_HPPIR1_EL1:
|
|
|
++ case SYSREG_ICC_IAR0_EL1:
|
|
|
++ case SYSREG_ICC_IAR1_EL1:
|
|
|
++ case SYSREG_ICC_IGRPEN0_EL1:
|
|
|
++ case SYSREG_ICC_IGRPEN1_EL1:
|
|
|
++ case SYSREG_ICC_PMR_EL1:
|
|
|
++ case SYSREG_ICC_SGI0R_EL1:
|
|
|
++ case SYSREG_ICC_SGI1R_EL1:
|
|
|
++ case SYSREG_ICC_SRE_EL1:
|
|
|
++ case SYSREG_ICC_CTLR_EL1:
|
|
|
++ /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
|
|
|
++ if (!hvf_sysreg_read_cp(cpu, reg, &val)) {
|
|
|
++ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
|
|
|
++ }
|
|
|
++ break;
|
|
|
+ default:
|
|
|
+ if (is_id_sysreg(reg)) {
|
|
|
+ /* ID system registers read as RES0 */
|
|
|
+@@ -944,6 +1038,33 @@ static void pmswinc_write(CPUARMState *env, uint64_t value)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val)
|
|
|
++{
|
|
|
++ ARMCPU *arm_cpu = ARM_CPU(cpu);
|
|
|
++ CPUARMState *env = &arm_cpu->env;
|
|
|
++ const ARMCPRegInfo *ri;
|
|
|
++
|
|
|
++ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
|
|
|
++
|
|
|
++ if (ri) {
|
|
|
++ if (ri->accessfn) {
|
|
|
++ if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) {
|
|
|
++ return false;
|
|
|
++ }
|
|
|
++ }
|
|
|
++ if (ri->writefn) {
|
|
|
++ ri->writefn(env, ri, val);
|
|
|
++ } else {
|
|
|
++ CPREG_FIELD64(env, ri) = val;
|
|
|
++ }
|
|
|
++
|
|
|
++ trace_hvf_vgic_write(ri->name, val);
|
|
|
++ return true;
|
|
|
++ }
|
|
|
++
|
|
|
++ return false;
|
|
|
++}
|
|
|
++
|
|
|
+ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
|
|
|
+ {
|
|
|
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
|
|
|
+@@ -1021,6 +1142,36 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
|
|
|
+ case SYSREG_OSDLR_EL1:
|
|
|
+ /* Dummy register */
|
|
|
+ break;
|
|
|
++ case SYSREG_ICC_AP0R0_EL1:
|
|
|
++ case SYSREG_ICC_AP0R1_EL1:
|
|
|
++ case SYSREG_ICC_AP0R2_EL1:
|
|
|
++ case SYSREG_ICC_AP0R3_EL1:
|
|
|
++ case SYSREG_ICC_AP1R0_EL1:
|
|
|
++ case SYSREG_ICC_AP1R1_EL1:
|
|
|
++ case SYSREG_ICC_AP1R2_EL1:
|
|
|
++ case SYSREG_ICC_AP1R3_EL1:
|
|
|
++ case SYSREG_ICC_ASGI1R_EL1:
|
|
|
++ case SYSREG_ICC_BPR0_EL1:
|
|
|
++ case SYSREG_ICC_BPR1_EL1:
|
|
|
++ case SYSREG_ICC_CTLR_EL1:
|
|
|
++ case SYSREG_ICC_DIR_EL1:
|
|
|
++ case SYSREG_ICC_EOIR0_EL1:
|
|
|
++ case SYSREG_ICC_EOIR1_EL1:
|
|
|
++ case SYSREG_ICC_HPPIR0_EL1:
|
|
|
++ case SYSREG_ICC_HPPIR1_EL1:
|
|
|
++ case SYSREG_ICC_IAR0_EL1:
|
|
|
++ case SYSREG_ICC_IAR1_EL1:
|
|
|
++ case SYSREG_ICC_IGRPEN0_EL1:
|
|
|
++ case SYSREG_ICC_IGRPEN1_EL1:
|
|
|
++ case SYSREG_ICC_PMR_EL1:
|
|
|
++ case SYSREG_ICC_SGI0R_EL1:
|
|
|
++ case SYSREG_ICC_SGI1R_EL1:
|
|
|
++ case SYSREG_ICC_SRE_EL1:
|
|
|
++ /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
|
|
|
++ if (!hvf_sysreg_write_cp(cpu, reg, val)) {
|
|
|
++ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
|
|
|
++ }
|
|
|
++ break;
|
|
|
+ default:
|
|
|
+ cpu_synchronize_state(cpu);
|
|
|
+ trace_hvf_unhandled_sysreg_write(env->pc, reg,
|
|
|
+diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
|
|
|
+index 820e8e0297..4fbbe4b45e 100644
|
|
|
+--- a/target/arm/hvf/trace-events
|
|
|
++++ b/target/arm/hvf/trace-events
|
|
|
+@@ -9,3 +9,5 @@ hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
|
|
|
+ hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
|
|
|
+ hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
|
|
|
+ hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
|
|
|
++hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]"
|
|
|
++hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]"
|