Browse Source

Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190205' into staging

target-arm queue:
 * Implement Armv8.5-BTI extension for system emulation mode
 * Implement the PR_PAC_RESET_KEYS prctl() for linux-user mode's Armv8.3-PAuth support
 * Support TBI (top-byte-ignore) properly for linux-user mode
 * gdbstub: allow killing QEMU via vKill command
 * hw/arm/boot: Support DTB autoload for firmware-only boots
 * target/arm: Make FPSCR/FPCR trapped-exception bits RAZ/WI

# gpg: Signature made Tue 05 Feb 2019 17:04:22 GMT
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* remotes/pmaydell/tags/pull-target-arm-20190205: (22 commits)
  target/arm: Make FPSCR/FPCR trapped-exception bits RAZ/WI
  hw/arm/boot: Support DTB autoload for firmware-only boots
  hw/arm/boot: Clarify why arm_setup_firmware_boot() doesn't set env->boot_info
  hw/arm/boot: Factor out "set up firmware boot" code
  hw/arm/boot: Factor out "direct kernel boot" code into its own function
  hw/arm/boot: Fix block comment style in arm_load_kernel()
  gdbstub: allow killing QEMU via vKill command
  target/arm: Enable TBI for user-only
  target/arm: Compute TB_FLAGS for TBI for user-only
  target/arm: Clean TBI for data operations in the translator
  target/arm: Add TBFLAG_A64_TBID, split out gen_top_byte_ignore
  tests/tcg/aarch64: Add pauth smoke test
  linux-user: Implement PR_PAC_RESET_KEYS
  target/arm: Enable BTI for -cpu max
  target/arm: Set btype for indirect branches
  target/arm: Reset btype for direct branches
  target/arm: Default handling of BTYPE during translation
  target/arm: Cache the GP bit for a page in MemTxAttrs
  exec: Add target-specific tlb bits to MemTxAttrs
  target/arm: Add BT and BTYPE to tb->flags
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 6 years ago
parent
commit
47994e16b1

+ 4 - 0
gdbstub.c

@@ -1359,6 +1359,10 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
 
 
             put_packet(s, buf);
             put_packet(s, buf);
             break;
             break;
+        } else if (strncmp(p, "Kill;", 5) == 0) {
+            /* Kill the target */
+            error_report("QEMU: Terminated via GDBstub");
+            exit(0);
         } else {
         } else {
             goto unknown_command;
             goto unknown_command;
         }
         }

+ 96 - 70
hw/arm/boot.c

@@ -949,9 +949,12 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
     return size;
     return size;
 }
 }
 
 
-void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
+static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
+                                         struct arm_boot_info *info)
 {
 {
+    /* Set up for a direct boot of a kernel image file. */
     CPUState *cs;
     CPUState *cs;
+    AddressSpace *as = arm_boot_address_space(cpu, info);
     int kernel_size;
     int kernel_size;
     int initrd_size;
     int initrd_size;
     int is_linux = 0;
     int is_linux = 0;
@@ -959,70 +962,6 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     int elf_machine;
     int elf_machine;
     hwaddr entry;
     hwaddr entry;
     static const ARMInsnFixup *primary_loader;
     static const ARMInsnFixup *primary_loader;
-    AddressSpace *as = arm_boot_address_space(cpu, info);
-
-    /* CPU objects (unlike devices) are not automatically reset on system
-     * reset, so we must always register a handler to do so. If we're
-     * actually loading a kernel, the handler is also responsible for
-     * arranging that we start it correctly.
-     */
-    for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
-        qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
-    }
-
-    /* The board code is not supposed to set secure_board_setup unless
-     * running its code in secure mode is actually possible, and KVM
-     * doesn't support secure.
-     */
-    assert(!(info->secure_board_setup && kvm_enabled()));
-
-    info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
-    info->dtb_limit = 0;
-
-    /* Load the kernel.  */
-    if (!info->kernel_filename || info->firmware_loaded) {
-
-        if (have_dtb(info)) {
-            /* If we have a device tree blob, but no kernel to supply it to (or
-             * the kernel is supposed to be loaded by the bootloader), copy the
-             * DTB to the base of RAM for the bootloader to pick up.
-             */
-            info->dtb_start = info->loader_start;
-        }
-
-        if (info->kernel_filename) {
-            FWCfgState *fw_cfg;
-            bool try_decompressing_kernel;
-
-            fw_cfg = fw_cfg_find();
-            try_decompressing_kernel = arm_feature(&cpu->env,
-                                                   ARM_FEATURE_AARCH64);
-
-            /* Expose the kernel, the command line, and the initrd in fw_cfg.
-             * We don't process them here at all, it's all left to the
-             * firmware.
-             */
-            load_image_to_fw_cfg(fw_cfg,
-                                 FW_CFG_KERNEL_SIZE, FW_CFG_KERNEL_DATA,
-                                 info->kernel_filename,
-                                 try_decompressing_kernel);
-            load_image_to_fw_cfg(fw_cfg,
-                                 FW_CFG_INITRD_SIZE, FW_CFG_INITRD_DATA,
-                                 info->initrd_filename, false);
-
-            if (info->kernel_cmdline) {
-                fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
-                               strlen(info->kernel_cmdline) + 1);
-                fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA,
-                                  info->kernel_cmdline);
-            }
-        }
-
-        /* We will start from address 0 (typically a boot ROM image) in the
-         * same way as hardware.
-         */
-        return;
-    }
 
 
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         primary_loader = bootloader_aarch64;
         primary_loader = bootloader_aarch64;
@@ -1045,7 +984,8 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     if (info->nb_cpus == 0)
     if (info->nb_cpus == 0)
         info->nb_cpus = 1;
         info->nb_cpus = 1;
 
 
-    /* We want to put the initrd far enough into RAM that when the
+    /*
+     * We want to put the initrd far enough into RAM that when the
      * kernel is uncompressed it will not clobber the initrd. However
      * kernel is uncompressed it will not clobber the initrd. However
      * on boards without much RAM we must ensure that we still leave
      * on boards without much RAM we must ensure that we still leave
      * enough room for a decent sized initrd, and on boards with large
      * enough room for a decent sized initrd, and on boards with large
@@ -1062,12 +1002,14 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr,
     kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr,
                                &elf_high_addr, elf_machine, as);
                                &elf_high_addr, elf_machine, as);
     if (kernel_size > 0 && have_dtb(info)) {
     if (kernel_size > 0 && have_dtb(info)) {
-        /* If there is still some room left at the base of RAM, try and put
+        /*
+         * If there is still some room left at the base of RAM, try and put
          * the DTB there like we do for images loaded with -bios or -pflash.
          * the DTB there like we do for images loaded with -bios or -pflash.
          */
          */
         if (elf_low_addr > info->loader_start
         if (elf_low_addr > info->loader_start
             || elf_high_addr < info->loader_start) {
             || elf_high_addr < info->loader_start) {
-            /* Set elf_low_addr as address limit for arm_load_dtb if it may be
+            /*
+             * Set elf_low_addr as address limit for arm_load_dtb if it may be
              * pointing into RAM, otherwise pass '0' (no limit)
              * pointing into RAM, otherwise pass '0' (no limit)
              */
              */
             if (elf_low_addr < info->loader_start) {
             if (elf_low_addr < info->loader_start) {
@@ -1128,7 +1070,8 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
         fixupcontext[FIXUP_BOARDID] = info->board_id;
         fixupcontext[FIXUP_BOARDID] = info->board_id;
         fixupcontext[FIXUP_BOARD_SETUP] = info->board_setup_addr;
         fixupcontext[FIXUP_BOARD_SETUP] = info->board_setup_addr;
 
 
-        /* for device tree boot, we pass the DTB directly in r2. Otherwise
+        /*
+         * for device tree boot, we pass the DTB directly in r2. Otherwise
          * we point to the kernel args.
          * we point to the kernel args.
          */
          */
         if (have_dtb(info)) {
         if (have_dtb(info)) {
@@ -1181,7 +1124,8 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
             info->write_board_setup(cpu, info);
             info->write_board_setup(cpu, info);
         }
         }
 
 
-        /* Notify devices which need to fake up firmware initialization
+        /*
+         * Notify devices which need to fake up firmware initialization
          * that we're doing a direct kernel boot.
          * that we're doing a direct kernel boot.
          */
          */
         object_child_foreach_recursive(object_get_root(),
         object_child_foreach_recursive(object_get_root(),
@@ -1192,6 +1136,88 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
     for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
         ARM_CPU(cs)->env.boot_info = info;
         ARM_CPU(cs)->env.boot_info = info;
     }
     }
+}
+
+static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info)
+{
+    /* Set up for booting firmware (which might load a kernel via fw_cfg) */
+
+    if (have_dtb(info)) {
+        /*
+         * If we have a device tree blob, but no kernel to supply it to (or
+         * the kernel is supposed to be loaded by the bootloader), copy the
+         * DTB to the base of RAM for the bootloader to pick up.
+         */
+        info->dtb_start = info->loader_start;
+    }
+
+    if (info->kernel_filename) {
+        FWCfgState *fw_cfg;
+        bool try_decompressing_kernel;
+
+        fw_cfg = fw_cfg_find();
+        try_decompressing_kernel = arm_feature(&cpu->env,
+                                               ARM_FEATURE_AARCH64);
+
+        /*
+         * Expose the kernel, the command line, and the initrd in fw_cfg.
+         * We don't process them here at all, it's all left to the
+         * firmware.
+         */
+        load_image_to_fw_cfg(fw_cfg,
+                             FW_CFG_KERNEL_SIZE, FW_CFG_KERNEL_DATA,
+                             info->kernel_filename,
+                             try_decompressing_kernel);
+        load_image_to_fw_cfg(fw_cfg,
+                             FW_CFG_INITRD_SIZE, FW_CFG_INITRD_DATA,
+                             info->initrd_filename, false);
+
+        if (info->kernel_cmdline) {
+            fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+                           strlen(info->kernel_cmdline) + 1);
+            fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA,
+                              info->kernel_cmdline);
+        }
+    }
+
+    /*
+     * We will start from address 0 (typically a boot ROM image) in the
+     * same way as hardware. Leave env->boot_info NULL, so that
+     * do_cpu_reset() knows it does not need to alter the PC on reset.
+     */
+}
+
+void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
+{
+    CPUState *cs;
+    AddressSpace *as = arm_boot_address_space(cpu, info);
+
+    /*
+     * CPU objects (unlike devices) are not automatically reset on system
+     * reset, so we must always register a handler to do so. If we're
+     * actually loading a kernel, the handler is also responsible for
+     * arranging that we start it correctly.
+     */
+    for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
+        qemu_register_reset(do_cpu_reset, ARM_CPU(cs));
+    }
+
+    /*
+     * The board code is not supposed to set secure_board_setup unless
+     * running its code in secure mode is actually possible, and KVM
+     * doesn't support secure.
+     */
+    assert(!(info->secure_board_setup && kvm_enabled()));
+
+    info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
+    info->dtb_limit = 0;
+
+    /* Load the kernel.  */
+    if (!info->kernel_filename || info->firmware_loaded) {
+        arm_setup_firmware_boot(cpu, info);
+    } else {
+        arm_setup_direct_kernel_boot(cpu, info);
+    }
 
 
     if (!info->skip_dtb_autoload && have_dtb(info)) {
     if (!info->skip_dtb_autoload && have_dtb(info)) {
         if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) {
         if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) {

+ 10 - 0
include/exec/memattrs.h

@@ -37,6 +37,16 @@ typedef struct MemTxAttrs {
     unsigned int user:1;
     unsigned int user:1;
     /* Requester ID (for MSI for example) */
     /* Requester ID (for MSI for example) */
     unsigned int requester_id:16;
     unsigned int requester_id:16;
+    /*
+     * The following are target-specific page-table bits.  These are not
+     * related to actual memory transactions at all.  However, this structure
+     * is part of the tlb_fill interface, cached in the cputlb structure,
+     * and has unused bits.  These fields will be read by target-specific
+     * helpers using env->iotlb[mmu_idx][tlb_index()].attrs.target_tlb_bitN.
+     */
+    unsigned int target_tlb_bit0 : 1;
+    unsigned int target_tlb_bit1 : 1;
+    unsigned int target_tlb_bit2 : 1;
 } MemTxAttrs;
 } MemTxAttrs;
 
 
 /* Bus masters which don't specify any attributes will get this,
 /* Bus masters which don't specify any attributes will get this,

+ 7 - 0
linux-user/aarch64/target_syscall.h

@@ -22,6 +22,13 @@ struct target_pt_regs {
 #define TARGET_PR_SVE_SET_VL  50
 #define TARGET_PR_SVE_SET_VL  50
 #define TARGET_PR_SVE_GET_VL  51
 #define TARGET_PR_SVE_GET_VL  51
 
 
+#define TARGET_PR_PAC_RESET_KEYS 54
+# define TARGET_PR_PAC_APIAKEY   (1 << 0)
+# define TARGET_PR_PAC_APIBKEY   (1 << 1)
+# define TARGET_PR_PAC_APDAKEY   (1 << 2)
+# define TARGET_PR_PAC_APDBKEY   (1 << 3)
+# define TARGET_PR_PAC_APGAKEY   (1 << 4)
+
 void arm_init_pauth_key(ARMPACKey *key);
 void arm_init_pauth_key(ARMPACKey *key);
 
 
 #endif /* AARCH64_TARGET_SYSCALL_H */
 #endif /* AARCH64_TARGET_SYSCALL_H */

+ 36 - 0
linux-user/syscall.c

@@ -9691,6 +9691,42 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                 }
                 }
             }
             }
             return ret;
             return ret;
+        case TARGET_PR_PAC_RESET_KEYS:
+            {
+                CPUARMState *env = cpu_env;
+                ARMCPU *cpu = arm_env_get_cpu(env);
+
+                if (arg3 || arg4 || arg5) {
+                    return -TARGET_EINVAL;
+                }
+                if (cpu_isar_feature(aa64_pauth, cpu)) {
+                    int all = (TARGET_PR_PAC_APIAKEY | TARGET_PR_PAC_APIBKEY |
+                               TARGET_PR_PAC_APDAKEY | TARGET_PR_PAC_APDBKEY |
+                               TARGET_PR_PAC_APGAKEY);
+                    if (arg2 == 0) {
+                        arg2 = all;
+                    } else if (arg2 & ~all) {
+                        return -TARGET_EINVAL;
+                    }
+                    if (arg2 & TARGET_PR_PAC_APIAKEY) {
+                        arm_init_pauth_key(&env->apia_key);
+                    }
+                    if (arg2 & TARGET_PR_PAC_APIBKEY) {
+                        arm_init_pauth_key(&env->apib_key);
+                    }
+                    if (arg2 & TARGET_PR_PAC_APDAKEY) {
+                        arm_init_pauth_key(&env->apda_key);
+                    }
+                    if (arg2 & TARGET_PR_PAC_APDBKEY) {
+                        arm_init_pauth_key(&env->apdb_key);
+                    }
+                    if (arg2 & TARGET_PR_PAC_APGAKEY) {
+                        arm_init_pauth_key(&env->apga_key);
+                    }
+                    return 0;
+                }
+            }
+            return -TARGET_EINVAL;
 #endif /* AARCH64 */
 #endif /* AARCH64 */
         case PR_GET_SECCOMP:
         case PR_GET_SECCOMP:
         case PR_SET_SECCOMP:
         case PR_SET_SECCOMP:

+ 6 - 0
target/arm/cpu.c

@@ -200,6 +200,12 @@ static void arm_cpu_reset(CPUState *s)
         env->vfp.zcr_el[1] = cpu->sve_max_vq - 1;
         env->vfp.zcr_el[1] = cpu->sve_max_vq - 1;
         env->vfp.zcr_el[2] = env->vfp.zcr_el[1];
         env->vfp.zcr_el[2] = env->vfp.zcr_el[1];
         env->vfp.zcr_el[3] = env->vfp.zcr_el[1];
         env->vfp.zcr_el[3] = env->vfp.zcr_el[1];
+        /*
+         * Enable TBI0 and TBI1.  While the real kernel only enables TBI0,
+         * turning on both here will produce smaller code and otherwise
+         * make no difference to the user-level emulation.
+         */
+        env->cp15.tcr_el[1].raw_tcr = (3ULL << 37);
 #else
 #else
         /* Reset into the highest available EL */
         /* Reset into the highest available EL */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
         if (arm_feature(env, ARM_FEATURE_EL3)) {

+ 25 - 2
target/arm/cpu.h

@@ -234,6 +234,7 @@ typedef struct CPUARMState {
      *    semantics as for AArch32, as described in the comments on each field)
      *    semantics as for AArch32, as described in the comments on each field)
      *  nRW (also known as M[4]) is kept, inverted, in env->aarch64
      *  nRW (also known as M[4]) is kept, inverted, in env->aarch64
      *  DAIF (exception masks) are kept in env->daif
      *  DAIF (exception masks) are kept in env->daif
+     *  BTYPE is kept in env->btype
      *  all other bits are stored in their correct places in env->pstate
      *  all other bits are stored in their correct places in env->pstate
      */
      */
     uint32_t pstate;
     uint32_t pstate;
@@ -263,6 +264,7 @@ typedef struct CPUARMState {
     uint32_t GE; /* cpsr[19:16] */
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
+    uint32_t btype;  /* BTI branch type.  spsr[11:10].  */
     uint64_t daif; /* exception masks, in the bits they are in PSTATE */
     uint64_t daif; /* exception masks, in the bits they are in PSTATE */
 
 
     uint64_t elr_el[4]; /* AArch64 exception link regs  */
     uint64_t elr_el[4]; /* AArch64 exception link regs  */
@@ -1206,6 +1208,7 @@ void pmu_init(ARMCPU *cpu);
 #define PSTATE_I (1U << 7)
 #define PSTATE_I (1U << 7)
 #define PSTATE_A (1U << 8)
 #define PSTATE_A (1U << 8)
 #define PSTATE_D (1U << 9)
 #define PSTATE_D (1U << 9)
+#define PSTATE_BTYPE (3U << 10)
 #define PSTATE_IL (1U << 20)
 #define PSTATE_IL (1U << 20)
 #define PSTATE_SS (1U << 21)
 #define PSTATE_SS (1U << 21)
 #define PSTATE_V (1U << 28)
 #define PSTATE_V (1U << 28)
@@ -1214,7 +1217,7 @@ void pmu_init(ARMCPU *cpu);
 #define PSTATE_N (1U << 31)
 #define PSTATE_N (1U << 31)
 #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
 #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
 #define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
 #define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
-#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF)
+#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF | PSTATE_BTYPE)
 /* Mode values for AArch64 */
 /* Mode values for AArch64 */
 #define PSTATE_MODE_EL3h 13
 #define PSTATE_MODE_EL3h 13
 #define PSTATE_MODE_EL3t 12
 #define PSTATE_MODE_EL3t 12
@@ -1246,7 +1249,7 @@ static inline uint32_t pstate_read(CPUARMState *env)
     ZF = (env->ZF == 0);
     ZF = (env->ZF == 0);
     return (env->NF & 0x80000000) | (ZF << 30)
     return (env->NF & 0x80000000) | (ZF << 30)
         | (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
         | (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
-        | env->pstate | env->daif;
+        | env->pstate | env->daif | (env->btype << 10);
 }
 }
 
 
 static inline void pstate_write(CPUARMState *env, uint32_t val)
 static inline void pstate_write(CPUARMState *env, uint32_t val)
@@ -1256,6 +1259,7 @@ static inline void pstate_write(CPUARMState *env, uint32_t val)
     env->CF = (val >> 29) & 1;
     env->CF = (val >> 29) & 1;
     env->VF = (val << 3) & 0x80000000;
     env->VF = (val << 3) & 0x80000000;
     env->daif = val & PSTATE_DAIF;
     env->daif = val & PSTATE_DAIF;
+    env->btype = (val >> 10) & 3;
     env->pstate = val & ~CACHED_PSTATE_BITS;
     env->pstate = val & ~CACHED_PSTATE_BITS;
 }
 }
 
 
@@ -1414,6 +1418,12 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
 #define FPSR_MASK 0xf800009f
 #define FPSR_MASK 0xf800009f
 #define FPCR_MASK 0x07ff9f00
 #define FPCR_MASK 0x07ff9f00
 
 
+#define FPCR_IOE    (1 << 8)    /* Invalid Operation exception trap enable */
+#define FPCR_DZE    (1 << 9)    /* Divide by Zero exception trap enable */
+#define FPCR_OFE    (1 << 10)   /* Overflow exception trap enable */
+#define FPCR_UFE    (1 << 11)   /* Underflow exception trap enable */
+#define FPCR_IXE    (1 << 12)   /* Inexact exception trap enable */
+#define FPCR_IDE    (1 << 15)   /* Input Denormal exception trap enable */
 #define FPCR_FZ16   (1 << 19)   /* ARMv8.2+, FP16 flush-to-zero */
 #define FPCR_FZ16   (1 << 19)   /* ARMv8.2+, FP16 flush-to-zero */
 #define FPCR_FZ     (1 << 24)   /* Flush-to-zero enable bit */
 #define FPCR_FZ     (1 << 24)   /* Flush-to-zero enable bit */
 #define FPCR_DN     (1 << 25)   /* Default NaN enable bit */
 #define FPCR_DN     (1 << 25)   /* Default NaN enable bit */
@@ -1681,6 +1691,11 @@ FIELD(ID_AA64PFR0, GIC, 24, 4)
 FIELD(ID_AA64PFR0, RAS, 28, 4)
 FIELD(ID_AA64PFR0, RAS, 28, 4)
 FIELD(ID_AA64PFR0, SVE, 32, 4)
 FIELD(ID_AA64PFR0, SVE, 32, 4)
 
 
+FIELD(ID_AA64PFR1, BT, 0, 4)
+FIELD(ID_AA64PFR1, SBSS, 4, 4)
+FIELD(ID_AA64PFR1, MTE, 8, 4)
+FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4)
+
 FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
 FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
 FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
 FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
 FIELD(ID_AA64MMFR0, BIGEND, 8, 4)
 FIELD(ID_AA64MMFR0, BIGEND, 8, 4)
@@ -3043,6 +3058,9 @@ FIELD(TBFLAG_A64, TBII, 0, 2)
 FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
 FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
 FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
 FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
 FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
 FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
+FIELD(TBFLAG_A64, BT, 9, 1)
+FIELD(TBFLAG_A64, BTYPE, 10, 2)
+FIELD(TBFLAG_A64, TBID, 12, 2)
 
 
 static inline bool bswap_code(bool sctlr_b)
 static inline bool bswap_code(bool sctlr_b)
 {
 {
@@ -3328,6 +3346,11 @@ static inline bool isar_feature_aa64_lor(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0;
     return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0;
 }
 }
 
 
+static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
+}
+
 /*
 /*
  * Forward to the above feature tests given an ARMCPU pointer.
  * Forward to the above feature tests given an ARMCPU pointer.
  */
  */

+ 4 - 0
target/arm/cpu64.c

@@ -324,6 +324,10 @@ static void aarch64_max_initfn(Object *obj)
         t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1);
         t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1);
         cpu->isar.id_aa64pfr0 = t;
         cpu->isar.id_aa64pfr0 = t;
 
 
+        t = cpu->isar.id_aa64pfr1;
+        t = FIELD_DP64(t, ID_AA64PFR1, BT, 1);
+        cpu->isar.id_aa64pfr1 = t;
+
         t = cpu->isar.id_aa64mmfr1;
         t = cpu->isar.id_aa64mmfr1;
         t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */
         t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */
         t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1);
         t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1);

+ 52 - 28
target/arm/helper.c

@@ -7197,7 +7197,7 @@ uint32_t HELPER(rbit)(uint32_t x)
     return revbit32(x);
     return revbit32(x);
 }
 }
 
 
-#if defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
 
 
 /* These should probably raise undefined insn exceptions.  */
 /* These should probably raise undefined insn exceptions.  */
 void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
 void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
@@ -9571,6 +9571,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
     }
 }
 }
+#endif /* !CONFIG_USER_ONLY */
 
 
 /* Return the exception level which controls this address translation regime */
 /* Return the exception level which controls this address translation regime */
 static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
 static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
@@ -9600,6 +9601,8 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
     }
     }
 }
 }
 
 
+#ifndef CONFIG_USER_ONLY
+
 /* Return the SCTLR value which controls this address translation regime */
 /* Return the SCTLR value which controls this address translation regime */
 static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
 static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
 {
@@ -9655,6 +9658,22 @@ static inline bool regime_translation_big_endian(CPUARMState *env,
     return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
     return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
 }
 }
 
 
+/* Return the TTBR associated with this translation regime */
+static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
+                                   int ttbrn)
+{
+    if (mmu_idx == ARMMMUIdx_S2NS) {
+        return env->cp15.vttbr_el2;
+    }
+    if (ttbrn == 0) {
+        return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
+    } else {
+        return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
+    }
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
 /* Return the TCR controlling this translation regime */
 /* Return the TCR controlling this translation regime */
 static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
 static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
 {
@@ -9675,20 +9694,6 @@ static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
     return mmu_idx;
     return mmu_idx;
 }
 }
 
 
-/* Return the TTBR associated with this translation regime */
-static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
-                                   int ttbrn)
-{
-    if (mmu_idx == ARMMMUIdx_S2NS) {
-        return env->cp15.vttbr_el2;
-    }
-    if (ttbrn == 0) {
-        return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
-    } else {
-        return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
-    }
-}
-
 /* Return true if the translation regime is using LPAE format page tables */
 /* Return true if the translation regime is using LPAE format page tables */
 static inline bool regime_using_lpae_format(CPUARMState *env,
 static inline bool regime_using_lpae_format(CPUARMState *env,
                                             ARMMMUIdx mmu_idx)
                                             ARMMMUIdx mmu_idx)
@@ -9714,6 +9719,7 @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
     return regime_using_lpae_format(env, mmu_idx);
     return regime_using_lpae_format(env, mmu_idx);
 }
 }
 
 
+#ifndef CONFIG_USER_ONLY
 static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
 static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
 {
     switch (mmu_idx) {
     switch (mmu_idx) {
@@ -10419,6 +10425,7 @@ static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
 
 
     return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
     return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
 }
 }
+#endif /* !CONFIG_USER_ONLY */
 
 
 ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
 ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
                                         ARMMMUIdx mmu_idx)
                                         ARMMMUIdx mmu_idx)
@@ -10490,6 +10497,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
     return ret;
     return ret;
 }
 }
 
 
+#ifndef CONFIG_USER_ONLY
 static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
 static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
                                           ARMMMUIdx mmu_idx)
                                           ARMMMUIdx mmu_idx)
 {
 {
@@ -10577,6 +10585,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     bool ttbr1_valid;
     bool ttbr1_valid;
     uint64_t descaddrmask;
     uint64_t descaddrmask;
     bool aarch64 = arm_el_is_aa64(env, el);
     bool aarch64 = arm_el_is_aa64(env, el);
+    bool guarded = false;
 
 
     /* TODO:
     /* TODO:
      * This code does not handle the different format TCR for VTCR_EL2.
      * This code does not handle the different format TCR for VTCR_EL2.
@@ -10756,6 +10765,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         }
         }
         /* Merge in attributes from table descriptors */
         /* Merge in attributes from table descriptors */
         attrs |= nstable << 3; /* NS */
         attrs |= nstable << 3; /* NS */
+        guarded = extract64(descriptor, 50, 1);  /* GP */
         if (param.hpd) {
         if (param.hpd) {
             /* HPD disables all the table attributes except NSTable.  */
             /* HPD disables all the table attributes except NSTable.  */
             break;
             break;
@@ -10801,6 +10811,10 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
          */
          */
         txattrs->secure = false;
         txattrs->secure = false;
     }
     }
+    /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB.  */
+    if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) {
+        txattrs->target_tlb_bit0 = true;
+    }
 
 
     if (cacheattrs != NULL) {
     if (cacheattrs != NULL) {
         if (mmu_idx == ARMMMUIdx_S2NS) {
         if (mmu_idx == ARMMMUIdx_S2NS) {
@@ -12623,6 +12637,12 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
         val &= ~FPCR_FZ16;
         val &= ~FPCR_FZ16;
     }
     }
 
 
+    /*
+     * We don't implement trapped exception handling, so the
+     * trap enable bits are all RAZ/WI (not RES0!)
+     */
+    val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE);
+
     changed = env->vfp.xregs[ARM_VFP_FPSCR];
     changed = env->vfp.xregs[ARM_VFP_FPSCR];
     env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
     env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
     env->vfp.vec_len = (val >> 16) & 7;
     env->vfp.vec_len = (val >> 16) & 7;
@@ -13735,15 +13755,12 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
 
 
     if (is_a64(env)) {
     if (is_a64(env)) {
         ARMCPU *cpu = arm_env_get_cpu(env);
         ARMCPU *cpu = arm_env_get_cpu(env);
+        uint64_t sctlr;
 
 
         *pc = env->pc;
         *pc = env->pc;
         flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
         flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
 
 
-#ifndef CONFIG_USER_ONLY
-        /*
-         * Get control bits for tagged addresses.  Note that the
-         * translator only uses this for instruction addresses.
-         */
+        /* Get control bits for tagged addresses.  */
         {
         {
             ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
             ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
             ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
             ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
@@ -13760,8 +13777,8 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             }
             }
 
 
             flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
             flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
+            flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
         }
         }
-#endif
 
 
         if (cpu_isar_feature(aa64_sve, cpu)) {
         if (cpu_isar_feature(aa64_sve, cpu)) {
             int sve_el = sve_exception_el(env, current_el);
             int sve_el = sve_exception_el(env, current_el);
@@ -13779,6 +13796,12 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
             flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
         }
         }
 
 
+        if (current_el == 0) {
+            /* FIXME: ARMv8.1-VHE S2 translation regime.  */
+            sctlr = env->cp15.sctlr_el[1];
+        } else {
+            sctlr = env->cp15.sctlr_el[current_el];
+        }
         if (cpu_isar_feature(aa64_pauth, cpu)) {
         if (cpu_isar_feature(aa64_pauth, cpu)) {
             /*
             /*
              * In order to save space in flags, we record only whether
              * In order to save space in flags, we record only whether
@@ -13786,17 +13809,18 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
              * a nop, or "active" when some action must be performed.
              * a nop, or "active" when some action must be performed.
              * The decision of which action to take is left to a helper.
              * The decision of which action to take is left to a helper.
              */
              */
-            uint64_t sctlr;
-            if (current_el == 0) {
-                /* FIXME: ARMv8.1-VHE S2 translation regime.  */
-                sctlr = env->cp15.sctlr_el[1];
-            } else {
-                sctlr = env->cp15.sctlr_el[current_el];
-            }
             if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
             if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
                 flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
                 flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
             }
             }
         }
         }
+
+        if (cpu_isar_feature(aa64_bti, cpu)) {
+            /* Note that SCTLR_EL[23].BT == SCTLR_BT1.  */
+            if (sctlr & (current_el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
+                flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
+            }
+            flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
+        }
     } else {
     } else {
         *pc = env->regs[15];
         *pc = env->regs[15];
         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
         flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);

+ 6 - 21
target/arm/internals.h

@@ -268,6 +268,7 @@ enum arm_exception_class {
     EC_FPIDTRAP               = 0x08,
     EC_FPIDTRAP               = 0x08,
     EC_PACTRAP                = 0x09,
     EC_PACTRAP                = 0x09,
     EC_CP14RRTTRAP            = 0x0c,
     EC_CP14RRTTRAP            = 0x0c,
+    EC_BTITRAP                = 0x0d,
     EC_ILLEGALSTATE           = 0x0e,
     EC_ILLEGALSTATE           = 0x0e,
     EC_AA32_SVC               = 0x11,
     EC_AA32_SVC               = 0x11,
     EC_AA32_HVC               = 0x12,
     EC_AA32_HVC               = 0x12,
@@ -439,6 +440,11 @@ static inline uint32_t syn_pactrap(void)
     return EC_PACTRAP << ARM_EL_EC_SHIFT;
     return EC_PACTRAP << ARM_EL_EC_SHIFT;
 }
 }
 
 
+static inline uint32_t syn_btitrap(int btype)
+{
+    return (EC_BTITRAP << ARM_EL_EC_SHIFT) | btype;
+}
+
 static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
 static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
 {
 {
     return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
     return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
@@ -957,30 +963,9 @@ typedef struct ARMVAParameters {
     bool using64k   : 1;
     bool using64k   : 1;
 } ARMVAParameters;
 } ARMVAParameters;
 
 
-#ifdef CONFIG_USER_ONLY
-static inline ARMVAParameters aa64_va_parameters_both(CPUARMState *env,
-                                                      uint64_t va,
-                                                      ARMMMUIdx mmu_idx)
-{
-    return (ARMVAParameters) {
-        /* 48-bit address space */
-        .tsz = 16,
-        /* We can't handle tagged addresses properly in user-only mode */
-        .tbi = false,
-    };
-}
-
-static inline ARMVAParameters aa64_va_parameters(CPUARMState *env,
-                                                 uint64_t va,
-                                                 ARMMMUIdx mmu_idx, bool data)
-{
-    return aa64_va_parameters_both(env, va, mmu_idx);
-}
-#else
 ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
 ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
                                         ARMMMUIdx mmu_idx);
                                         ARMMMUIdx mmu_idx);
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
                                    ARMMMUIdx mmu_idx, bool data);
                                    ARMMMUIdx mmu_idx, bool data);
-#endif
 
 
 #endif
 #endif

+ 337 - 137
target/arm/translate-a64.c

@@ -128,6 +128,29 @@ static inline int get_a64_user_mem_index(DisasContext *s)
     return arm_to_core_mmu_idx(useridx);
     return arm_to_core_mmu_idx(useridx);
 }
 }
 
 
+static void reset_btype(DisasContext *s)
+{
+    if (s->btype != 0) {
+        TCGv_i32 zero = tcg_const_i32(0);
+        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
+        tcg_temp_free_i32(zero);
+        s->btype = 0;
+    }
+}
+
+static void set_btype(DisasContext *s, int val)
+{
+    TCGv_i32 tcg_val;
+
+    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
+    tcg_debug_assert(val >= 1 && val <= 3);
+
+    tcg_val = tcg_const_i32(val);
+    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
+    tcg_temp_free_i32(tcg_val);
+    s->btype = -1;
+}
+
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags)
                             fprintf_function cpu_fprintf, int flags)
 {
 {
@@ -163,6 +186,9 @@ void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
                 el,
                 el,
                 psr & PSTATE_SP ? 'h' : 't');
                 psr & PSTATE_SP ? 'h' : 't');
 
 
+    if (cpu_isar_feature(aa64_bti, cpu)) {
+        cpu_fprintf(f, "  BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
+    }
     if (!(flags & CPU_DUMP_FPU)) {
     if (!(flags & CPU_DUMP_FPU)) {
         cpu_fprintf(f, "\n");
         cpu_fprintf(f, "\n");
         return;
         return;
@@ -258,10 +284,10 @@ void gen_a64_set_pc_im(uint64_t val)
     tcg_gen_movi_i64(cpu_pc, val);
     tcg_gen_movi_i64(cpu_pc, val);
 }
 }
 
 
-/* Load the PC from a generic TCG variable.
+/*
+ * Handle Top Byte Ignore (TBI) bits.
  *
  *
- * If address tagging is enabled via the TCR TBI bits, then loading
- * an address into the PC will clear out any tag in it:
+ * If address tagging is enabled via the TCR TBI bits:
  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
  *    then the address is zero-extended, clearing bits [63:56]
  *    then the address is zero-extended, clearing bits [63:56]
  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
@@ -269,45 +295,56 @@ void gen_a64_set_pc_im(uint64_t val)
  *    If the appropriate TBI bit is set for the address then
  *    If the appropriate TBI bit is set for the address then
  *    the address is sign-extended from bit 55 into bits [63:56]
  *    the address is sign-extended from bit 55 into bits [63:56]
  *
  *
- * We can avoid doing this for relative-branches, because the
- * PC + offset can never overflow into the tag bits (assuming
- * that virtual addresses are less than 56 bits wide, as they
- * are currently), but we must handle it for branch-to-register.
+ * Here We have concatenated TBI{1,0} into tbi.
  */
  */
-static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
+static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
+                                TCGv_i64 src, int tbi)
 {
 {
-    /* Note that TBII is TBI1:TBI0.  */
-    int tbi = s->tbii;
-
-    if (s->current_el <= 1) {
-        if (tbi != 0) {
-            /* Sign-extend from bit 55.  */
-            tcg_gen_sextract_i64(cpu_pc, src, 0, 56);
+    if (tbi == 0) {
+        /* Load unmodified address */
+        tcg_gen_mov_i64(dst, src);
+    } else if (s->current_el >= 2) {
+        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
+        /* Force tag byte to all zero */
+        tcg_gen_extract_i64(dst, src, 0, 56);
+    } else {
+        /* Sign-extend from bit 55.  */
+        tcg_gen_sextract_i64(dst, src, 0, 56);
 
 
-            if (tbi != 3) {
-                TCGv_i64 tcg_zero = tcg_const_i64(0);
+        if (tbi != 3) {
+            TCGv_i64 tcg_zero = tcg_const_i64(0);
 
 
-                /*
-                 * The two TBI bits differ.
-                 * If tbi0, then !tbi1: only use the extension if positive.
-                 * if !tbi0, then tbi1: only use the extension if negative.
-                 */
-                tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
-                                    cpu_pc, cpu_pc, tcg_zero, cpu_pc, src);
-                tcg_temp_free_i64(tcg_zero);
-            }
-            return;
-        }
-    } else {
-        if (tbi != 0) {
-            /* Force tag byte to all zero */
-            tcg_gen_extract_i64(cpu_pc, src, 0, 56);
-            return;
+            /*
+             * The two TBI bits differ.
+             * If tbi0, then !tbi1: only use the extension if positive.
+             * if !tbi0, then tbi1: only use the extension if negative.
+             */
+            tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
+                                dst, dst, tcg_zero, dst, src);
+            tcg_temp_free_i64(tcg_zero);
         }
         }
     }
     }
+}
 
 
-    /* Load unmodified address */
-    tcg_gen_mov_i64(cpu_pc, src);
+static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
+{
+    /*
+     * If address tagging is enabled for instructions via the TCR TBI bits,
+     * then loading an address into the PC will clear out any tag.
+     */
+    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
+}
+
+/*
+ * Return a "clean" address for ADDR according to TBID.
+ * This is always a fresh temporary, as we need to be able to
+ * increment this independently of a dirty write-back address.
+ */
+static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
+{
+    TCGv_i64 clean = new_tmp_a64(s);
+    gen_top_byte_ignore(s, clean, addr, s->tbid);
+    return clean;
 }
 }
 
 
 typedef struct DisasCompare64 {
 typedef struct DisasCompare64 {
@@ -1349,6 +1386,7 @@ static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
     }
     }
 
 
     /* B Branch / BL Branch with link */
     /* B Branch / BL Branch with link */
+    reset_btype(s);
     gen_goto_tb(s, 0, addr);
     gen_goto_tb(s, 0, addr);
 }
 }
 
 
@@ -1373,6 +1411,7 @@ static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
     tcg_cmp = read_cpu_reg(s, rt, sf);
     tcg_cmp = read_cpu_reg(s, rt, sf);
     label_match = gen_new_label();
     label_match = gen_new_label();
 
 
+    reset_btype(s);
     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
                         tcg_cmp, 0, label_match);
                         tcg_cmp, 0, label_match);
 
 
@@ -1402,6 +1441,8 @@ static void disas_test_b_imm(DisasContext *s, uint32_t insn)
     tcg_cmp = tcg_temp_new_i64();
     tcg_cmp = tcg_temp_new_i64();
     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
     label_match = gen_new_label();
     label_match = gen_new_label();
+
+    reset_btype(s);
     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
                         tcg_cmp, 0, label_match);
                         tcg_cmp, 0, label_match);
     tcg_temp_free_i64(tcg_cmp);
     tcg_temp_free_i64(tcg_cmp);
@@ -1428,6 +1469,7 @@ static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
     cond = extract32(insn, 0, 4);
     cond = extract32(insn, 0, 4);
 
 
+    reset_btype(s);
     if (cond < 0x0e) {
     if (cond < 0x0e) {
         /* genuinely conditional branches */
         /* genuinely conditional branches */
         TCGLabel *label_match = gen_new_label();
         TCGLabel *label_match = gen_new_label();
@@ -1592,6 +1634,7 @@ static void handle_sync(DisasContext *s, uint32_t insn,
          * a self-modified code correctly and also to take
          * a self-modified code correctly and also to take
          * any pending interrupts immediately.
          * any pending interrupts immediately.
          */
          */
+        reset_btype(s);
         gen_goto_tb(s, 0, s->pc);
         gen_goto_tb(s, 0, s->pc);
         return;
         return;
     default:
     default:
@@ -1963,6 +2006,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 {
 {
     unsigned int opc, op2, op3, rn, op4;
     unsigned int opc, op2, op3, rn, op4;
+    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
     TCGv_i64 dst;
     TCGv_i64 dst;
     TCGv_i64 modifier;
     TCGv_i64 modifier;
 
 
@@ -1980,6 +2024,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
     case 0: /* BR */
     case 0: /* BR */
     case 1: /* BLR */
     case 1: /* BLR */
     case 2: /* RET */
     case 2: /* RET */
+        btype_mod = opc;
         switch (op3) {
         switch (op3) {
         case 0:
         case 0:
             /* BR, BLR, RET */
             /* BR, BLR, RET */
@@ -2023,7 +2068,6 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
         default:
         default:
             goto do_unallocated;
             goto do_unallocated;
         }
         }
-
         gen_a64_set_pc(s, dst);
         gen_a64_set_pc(s, dst);
         /* BLR also needs to load return address */
         /* BLR also needs to load return address */
         if (opc == 1) {
         if (opc == 1) {
@@ -2039,6 +2083,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
         if ((op3 & ~1) != 2) {
         if ((op3 & ~1) != 2) {
             goto do_unallocated;
             goto do_unallocated;
         }
         }
+        btype_mod = opc & 1;
         if (s->pauth_active) {
         if (s->pauth_active) {
             dst = new_tmp_a64(s);
             dst = new_tmp_a64(s);
             modifier = cpu_reg_sp(s, op4);
             modifier = cpu_reg_sp(s, op4);
@@ -2122,6 +2167,26 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
         return;
         return;
     }
     }
 
 
+    switch (btype_mod) {
+    case 0: /* BR */
+        if (dc_isar_feature(aa64_bti, s)) {
+            /* BR to {x16,x17} or !guard -> 1, else 3.  */
+            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
+        }
+        break;
+
+    case 1: /* BLR */
+        if (dc_isar_feature(aa64_bti, s)) {
+            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
+            set_btype(s, 2);
+        }
+        break;
+
+    default: /* RET or none of the above.  */
+        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
+        break;
+    }
+
     s->base.is_jmp = DISAS_JUMP;
     s->base.is_jmp = DISAS_JUMP;
 }
 }
 
 
@@ -2294,12 +2359,13 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
     TCGv_i64 tcg_rs = cpu_reg(s, rs);
     TCGv_i64 tcg_rs = cpu_reg(s, rs);
     TCGv_i64 tcg_rt = cpu_reg(s, rt);
     TCGv_i64 tcg_rt = cpu_reg(s, rt);
     int memidx = get_mem_index(s);
     int memidx = get_mem_index(s);
-    TCGv_i64 addr = cpu_reg_sp(s, rn);
+    TCGv_i64 clean_addr;
 
 
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
                                size | MO_ALIGN | s->be_data);
                                size | MO_ALIGN | s->be_data);
 }
 }
 
 
@@ -2310,12 +2376,13 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
     TCGv_i64 s2 = cpu_reg(s, rs + 1);
     TCGv_i64 s2 = cpu_reg(s, rs + 1);
     TCGv_i64 t1 = cpu_reg(s, rt);
     TCGv_i64 t1 = cpu_reg(s, rt);
     TCGv_i64 t2 = cpu_reg(s, rt + 1);
     TCGv_i64 t2 = cpu_reg(s, rt + 1);
-    TCGv_i64 addr = cpu_reg_sp(s, rn);
+    TCGv_i64 clean_addr;
     int memidx = get_mem_index(s);
     int memidx = get_mem_index(s);
 
 
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
 
 
     if (size == 2) {
     if (size == 2) {
         TCGv_i64 cmp = tcg_temp_new_i64();
         TCGv_i64 cmp = tcg_temp_new_i64();
@@ -2329,7 +2396,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
             tcg_gen_concat32_i64(cmp, s2, s1);
             tcg_gen_concat32_i64(cmp, s2, s1);
         }
         }
 
 
-        tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
+        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
                                    MO_64 | MO_ALIGN | s->be_data);
                                    MO_64 | MO_ALIGN | s->be_data);
         tcg_temp_free_i64(val);
         tcg_temp_free_i64(val);
 
 
@@ -2343,9 +2410,11 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         if (HAVE_CMPXCHG128) {
         if (HAVE_CMPXCHG128) {
             TCGv_i32 tcg_rs = tcg_const_i32(rs);
             TCGv_i32 tcg_rs = tcg_const_i32(rs);
             if (s->be_data == MO_LE) {
             if (s->be_data == MO_LE) {
-                gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
+                                            clean_addr, t1, t2);
             } else {
             } else {
-                gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
+                                            clean_addr, t1, t2);
             }
             }
             tcg_temp_free_i32(tcg_rs);
             tcg_temp_free_i32(tcg_rs);
         } else {
         } else {
@@ -2361,10 +2430,10 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         TCGv_i64 zero = tcg_const_i64(0);
         TCGv_i64 zero = tcg_const_i64(0);
 
 
         /* Load the two words, in memory order.  */
         /* Load the two words, in memory order.  */
-        tcg_gen_qemu_ld_i64(d1, addr, memidx,
+        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
                             MO_64 | MO_ALIGN_16 | s->be_data);
                             MO_64 | MO_ALIGN_16 | s->be_data);
-        tcg_gen_addi_i64(a2, addr, 8);
-        tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
+        tcg_gen_addi_i64(a2, clean_addr, 8);
+        tcg_gen_qemu_ld_i64(d2, clean_addr, memidx, MO_64 | s->be_data);
 
 
         /* Compare the two words, also in memory order.  */
         /* Compare the two words, also in memory order.  */
         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
@@ -2374,7 +2443,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
         /* If compare equal, write back new data, else write back old data.  */
         /* If compare equal, write back new data, else write back old data.  */
         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
-        tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
+        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
         tcg_temp_free_i64(a2);
         tcg_temp_free_i64(a2);
         tcg_temp_free_i64(c1);
         tcg_temp_free_i64(c1);
@@ -2427,7 +2496,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
     int is_lasr = extract32(insn, 15, 1);
     int is_lasr = extract32(insn, 15, 1);
     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
     int size = extract32(insn, 30, 2);
     int size = extract32(insn, 30, 2);
-    TCGv_i64 tcg_addr;
+    TCGv_i64 clean_addr;
 
 
     switch (o2_L_o1_o0) {
     switch (o2_L_o1_o0) {
     case 0x0: /* STXR */
     case 0x0: /* STXR */
@@ -2438,8 +2507,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (is_lasr) {
         if (is_lasr) {
             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
         }
         }
-        tcg_addr = read_cpu_reg_sp(s, rn, 1);
-        gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
         return;
         return;
 
 
     case 0x4: /* LDXR */
     case 0x4: /* LDXR */
@@ -2447,9 +2516,9 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
         if (rn == 31) {
             gen_check_sp_alignment(s);
             gen_check_sp_alignment(s);
         }
         }
-        tcg_addr = read_cpu_reg_sp(s, rn, 1);
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
         s->is_ldex = true;
         s->is_ldex = true;
-        gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
+        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
         if (is_lasr) {
         if (is_lasr) {
             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
         }
         }
@@ -2467,8 +2536,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             gen_check_sp_alignment(s);
             gen_check_sp_alignment(s);
         }
         }
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-        tcg_addr = read_cpu_reg_sp(s, rn, 1);
-        do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         return;
         return;
 
 
@@ -2483,8 +2552,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (rn == 31) {
         if (rn == 31) {
             gen_check_sp_alignment(s);
             gen_check_sp_alignment(s);
         }
         }
-        tcg_addr = read_cpu_reg_sp(s, rn, 1);
-        do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
+        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
         return;
         return;
@@ -2497,8 +2566,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             if (is_lasr) {
             if (is_lasr) {
                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
             }
             }
-            tcg_addr = read_cpu_reg_sp(s, rn, 1);
-            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
+            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
             return;
             return;
         }
         }
         if (rt2 == 31
         if (rt2 == 31
@@ -2515,9 +2584,9 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
             if (rn == 31) {
             if (rn == 31) {
                 gen_check_sp_alignment(s);
                 gen_check_sp_alignment(s);
             }
             }
-            tcg_addr = read_cpu_reg_sp(s, rn, 1);
+            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
             s->is_ldex = true;
             s->is_ldex = true;
-            gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
+            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
             if (is_lasr) {
             if (is_lasr) {
                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
             }
             }
@@ -2566,7 +2635,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
     int opc = extract32(insn, 30, 2);
     int opc = extract32(insn, 30, 2);
     bool is_signed = false;
     bool is_signed = false;
     int size = 2;
     int size = 2;
-    TCGv_i64 tcg_rt, tcg_addr;
+    TCGv_i64 tcg_rt, clean_addr;
 
 
     if (is_vector) {
     if (is_vector) {
         if (opc == 3) {
         if (opc == 3) {
@@ -2588,17 +2657,17 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
 
 
     tcg_rt = cpu_reg(s, rt);
     tcg_rt = cpu_reg(s, rt);
 
 
-    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
+    clean_addr = tcg_const_i64((s->pc - 4) + imm);
     if (is_vector) {
     if (is_vector) {
-        do_fp_ld(s, rt, tcg_addr, size);
+        do_fp_ld(s, rt, clean_addr, size);
     } else {
     } else {
         /* Only unsigned 32bit loads target 32bit registers.  */
         /* Only unsigned 32bit loads target 32bit registers.  */
         bool iss_sf = opc != 0;
         bool iss_sf = opc != 0;
 
 
-        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
+        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
                   true, rt, iss_sf, false);
                   true, rt, iss_sf, false);
     }
     }
-    tcg_temp_free_i64(tcg_addr);
+    tcg_temp_free_i64(clean_addr);
 }
 }
 
 
 /*
 /*
@@ -2644,7 +2713,8 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
     bool postindex = false;
     bool postindex = false;
     bool wback = false;
     bool wback = false;
 
 
-    TCGv_i64 tcg_addr; /* calculated address */
+    TCGv_i64 clean_addr, dirty_addr;
+
     int size;
     int size;
 
 
     if (opc == 3) {
     if (opc == 3) {
@@ -2700,23 +2770,23 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
 
 
-    tcg_addr = read_cpu_reg_sp(s, rn, 1);
-
+    dirty_addr = read_cpu_reg_sp(s, rn, 1);
     if (!postindex) {
     if (!postindex) {
-        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
     }
     }
+    clean_addr = clean_data_tbi(s, dirty_addr);
 
 
     if (is_vector) {
     if (is_vector) {
         if (is_load) {
         if (is_load) {
-            do_fp_ld(s, rt, tcg_addr, size);
+            do_fp_ld(s, rt, clean_addr, size);
         } else {
         } else {
-            do_fp_st(s, rt, tcg_addr, size);
+            do_fp_st(s, rt, clean_addr, size);
         }
         }
-        tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
+        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
         if (is_load) {
         if (is_load) {
-            do_fp_ld(s, rt2, tcg_addr, size);
+            do_fp_ld(s, rt2, clean_addr, size);
         } else {
         } else {
-            do_fp_st(s, rt2, tcg_addr, size);
+            do_fp_st(s, rt2, clean_addr, size);
         }
         }
     } else {
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
@@ -2728,30 +2798,28 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
             /* Do not modify tcg_rt before recognizing any exception
             /* Do not modify tcg_rt before recognizing any exception
              * from the second load.
              * from the second load.
              */
              */
-            do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
+            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
                       false, 0, false, false);
                       false, 0, false, false);
-            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
-            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
+            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
+            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
                       false, 0, false, false);
                       false, 0, false, false);
 
 
             tcg_gen_mov_i64(tcg_rt, tmp);
             tcg_gen_mov_i64(tcg_rt, tmp);
             tcg_temp_free_i64(tmp);
             tcg_temp_free_i64(tmp);
         } else {
         } else {
-            do_gpr_st(s, tcg_rt, tcg_addr, size,
+            do_gpr_st(s, tcg_rt, clean_addr, size,
                       false, 0, false, false);
                       false, 0, false, false);
-            tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
-            do_gpr_st(s, tcg_rt2, tcg_addr, size,
+            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
+            do_gpr_st(s, tcg_rt2, clean_addr, size,
                       false, 0, false, false);
                       false, 0, false, false);
         }
         }
     }
     }
 
 
     if (wback) {
     if (wback) {
         if (postindex) {
         if (postindex) {
-            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
-        } else {
-            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
+            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
         }
         }
-        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
+        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
     }
     }
 }
 }
 
 
@@ -2788,7 +2856,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     bool post_index;
     bool post_index;
     bool writeback;
     bool writeback;
 
 
-    TCGv_i64 tcg_addr;
+    TCGv_i64 clean_addr, dirty_addr;
 
 
     if (is_vector) {
     if (is_vector) {
         size |= (opc & 2) << 1;
         size |= (opc & 2) << 1;
@@ -2839,17 +2907,18 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_addr = read_cpu_reg_sp(s, rn, 1);
 
 
+    dirty_addr = read_cpu_reg_sp(s, rn, 1);
     if (!post_index) {
     if (!post_index) {
-        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
     }
     }
+    clean_addr = clean_data_tbi(s, dirty_addr);
 
 
     if (is_vector) {
     if (is_vector) {
         if (is_store) {
         if (is_store) {
-            do_fp_st(s, rt, tcg_addr, size);
+            do_fp_st(s, rt, clean_addr, size);
         } else {
         } else {
-            do_fp_ld(s, rt, tcg_addr, size);
+            do_fp_ld(s, rt, clean_addr, size);
         }
         }
     } else {
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
@@ -2857,10 +2926,10 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
 
 
         if (is_store) {
         if (is_store) {
-            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
+            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
                              iss_valid, rt, iss_sf, false);
                              iss_valid, rt, iss_sf, false);
         } else {
         } else {
-            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
+            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
                              is_signed, is_extended, memidx,
                              is_signed, is_extended, memidx,
                              iss_valid, rt, iss_sf, false);
                              iss_valid, rt, iss_sf, false);
         }
         }
@@ -2869,9 +2938,9 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
     if (writeback) {
     if (writeback) {
         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
         if (post_index) {
         if (post_index) {
-            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
         }
         }
-        tcg_gen_mov_i64(tcg_rn, tcg_addr);
+        tcg_gen_mov_i64(tcg_rn, dirty_addr);
     }
     }
 }
 }
 
 
@@ -2910,8 +2979,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
     bool is_store = false;
     bool is_store = false;
     bool is_extended = false;
     bool is_extended = false;
 
 
-    TCGv_i64 tcg_rm;
-    TCGv_i64 tcg_addr;
+    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
 
 
     if (extract32(opt, 1, 1) == 0) {
     if (extract32(opt, 1, 1) == 0) {
         unallocated_encoding(s);
         unallocated_encoding(s);
@@ -2945,27 +3013,28 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+    dirty_addr = read_cpu_reg_sp(s, rn, 1);
 
 
     tcg_rm = read_cpu_reg(s, rm, 1);
     tcg_rm = read_cpu_reg(s, rm, 1);
     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
 
 
-    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
+    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
+    clean_addr = clean_data_tbi(s, dirty_addr);
 
 
     if (is_vector) {
     if (is_vector) {
         if (is_store) {
         if (is_store) {
-            do_fp_st(s, rt, tcg_addr, size);
+            do_fp_st(s, rt, clean_addr, size);
         } else {
         } else {
-            do_fp_ld(s, rt, tcg_addr, size);
+            do_fp_ld(s, rt, clean_addr, size);
         }
         }
     } else {
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         if (is_store) {
         if (is_store) {
-            do_gpr_st(s, tcg_rt, tcg_addr, size,
+            do_gpr_st(s, tcg_rt, clean_addr, size,
                       true, rt, iss_sf, false);
                       true, rt, iss_sf, false);
         } else {
         } else {
-            do_gpr_ld(s, tcg_rt, tcg_addr, size,
+            do_gpr_ld(s, tcg_rt, clean_addr, size,
                       is_signed, is_extended,
                       is_signed, is_extended,
                       true, rt, iss_sf, false);
                       true, rt, iss_sf, false);
         }
         }
@@ -2999,7 +3068,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
     unsigned int imm12 = extract32(insn, 10, 12);
     unsigned int imm12 = extract32(insn, 10, 12);
     unsigned int offset;
     unsigned int offset;
 
 
-    TCGv_i64 tcg_addr;
+    TCGv_i64 clean_addr, dirty_addr;
 
 
     bool is_store;
     bool is_store;
     bool is_signed = false;
     bool is_signed = false;
@@ -3032,24 +3101,25 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+    dirty_addr = read_cpu_reg_sp(s, rn, 1);
     offset = imm12 << size;
     offset = imm12 << size;
-    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
+    clean_addr = clean_data_tbi(s, dirty_addr);
 
 
     if (is_vector) {
     if (is_vector) {
         if (is_store) {
         if (is_store) {
-            do_fp_st(s, rt, tcg_addr, size);
+            do_fp_st(s, rt, clean_addr, size);
         } else {
         } else {
-            do_fp_ld(s, rt, tcg_addr, size);
+            do_fp_ld(s, rt, clean_addr, size);
         }
         }
     } else {
     } else {
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         TCGv_i64 tcg_rt = cpu_reg(s, rt);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
         if (is_store) {
         if (is_store) {
-            do_gpr_st(s, tcg_rt, tcg_addr, size,
+            do_gpr_st(s, tcg_rt, clean_addr, size,
                       true, rt, iss_sf, false);
                       true, rt, iss_sf, false);
         } else {
         } else {
-            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
+            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
                       true, rt, iss_sf, false);
                       true, rt, iss_sf, false);
         }
         }
     }
     }
@@ -3075,7 +3145,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     int rs = extract32(insn, 16, 5);
     int rs = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int rn = extract32(insn, 5, 5);
     int o3_opc = extract32(insn, 12, 4);
     int o3_opc = extract32(insn, 12, 4);
-    TCGv_i64 tcg_rn, tcg_rs;
+    TCGv_i64 tcg_rs, clean_addr;
     AtomicThreeOpFn *fn;
     AtomicThreeOpFn *fn;
 
 
     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
@@ -3118,7 +3188,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_rn = cpu_reg_sp(s, rn);
+    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
     tcg_rs = read_cpu_reg(s, rs, true);
     tcg_rs = read_cpu_reg(s, rs, true);
 
 
     if (o3_opc == 1) { /* LDCLR */
     if (o3_opc == 1) { /* LDCLR */
@@ -3128,7 +3198,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     /* The tcg atomic primitives are all full barriers.  Therefore we
     /* The tcg atomic primitives are all full barriers.  Therefore we
      * can ignore the Acquire and Release bits of this instruction.
      * can ignore the Acquire and Release bits of this instruction.
      */
      */
-    fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
        s->be_data | size | MO_ALIGN);
        s->be_data | size | MO_ALIGN);
 }
 }
 
 
@@ -3154,7 +3224,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
     bool is_wback = extract32(insn, 11, 1);
     bool is_wback = extract32(insn, 11, 1);
     bool use_key_a = !extract32(insn, 23, 1);
     bool use_key_a = !extract32(insn, 23, 1);
     int offset;
     int offset;
-    TCGv_i64 tcg_addr, tcg_rt;
+    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
 
 
     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
         unallocated_encoding(s);
         unallocated_encoding(s);
@@ -3164,29 +3234,31 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
     if (rn == 31) {
     if (rn == 31) {
         gen_check_sp_alignment(s);
         gen_check_sp_alignment(s);
     }
     }
-    tcg_addr = read_cpu_reg_sp(s, rn, 1);
+    dirty_addr = read_cpu_reg_sp(s, rn, 1);
 
 
     if (s->pauth_active) {
     if (s->pauth_active) {
         if (use_key_a) {
         if (use_key_a) {
-            gen_helper_autda(tcg_addr, cpu_env, tcg_addr, cpu_X[31]);
+            gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
         } else {
         } else {
-            gen_helper_autdb(tcg_addr, cpu_env, tcg_addr, cpu_X[31]);
+            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
         }
         }
     }
     }
 
 
     /* Form the 10-bit signed, scaled offset.  */
     /* Form the 10-bit signed, scaled offset.  */
     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
     offset = sextract32(offset << size, 0, 10 + size);
     offset = sextract32(offset << size, 0, 10 + size);
-    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
 
 
-    tcg_rt = cpu_reg(s, rt);
+    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
+    clean_addr = clean_data_tbi(s, dirty_addr);
 
 
-    do_gpr_ld(s, tcg_rt, tcg_addr, size, /* is_signed */ false,
+    tcg_rt = cpu_reg(s, rt);
+    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
               /* extend */ false, /* iss_valid */ !is_wback,
               /* extend */ false, /* iss_valid */ !is_wback,
               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
 
 
     if (is_wback) {
     if (is_wback) {
-        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
+        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
     }
     }
 }
 }
 
 
@@ -3255,7 +3327,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     bool is_store = !extract32(insn, 22, 1);
     bool is_store = !extract32(insn, 22, 1);
     bool is_postidx = extract32(insn, 23, 1);
     bool is_postidx = extract32(insn, 23, 1);
     bool is_q = extract32(insn, 30, 1);
     bool is_q = extract32(insn, 30, 1);
-    TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
+    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
     TCGMemOp endian = s->be_data;
     TCGMemOp endian = s->be_data;
 
 
     int ebytes;   /* bytes per element */
     int ebytes;   /* bytes per element */
@@ -3338,8 +3410,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
     elements = (is_q ? 16 : 8) / ebytes;
     elements = (is_q ? 16 : 8) / ebytes;
 
 
     tcg_rn = cpu_reg_sp(s, rn);
     tcg_rn = cpu_reg_sp(s, rn);
-    tcg_addr = tcg_temp_new_i64();
-    tcg_gen_mov_i64(tcg_addr, tcg_rn);
+    clean_addr = clean_data_tbi(s, tcg_rn);
     tcg_ebytes = tcg_const_i64(ebytes);
     tcg_ebytes = tcg_const_i64(ebytes);
 
 
     for (r = 0; r < rpt; r++) {
     for (r = 0; r < rpt; r++) {
@@ -3349,14 +3420,15 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
             for (xs = 0; xs < selem; xs++) {
             for (xs = 0; xs < selem; xs++) {
                 int tt = (rt + r + xs) % 32;
                 int tt = (rt + r + xs) % 32;
                 if (is_store) {
                 if (is_store) {
-                    do_vec_st(s, tt, e, tcg_addr, size, endian);
+                    do_vec_st(s, tt, e, clean_addr, size, endian);
                 } else {
                 } else {
-                    do_vec_ld(s, tt, e, tcg_addr, size, endian);
+                    do_vec_ld(s, tt, e, clean_addr, size, endian);
                 }
                 }
-                tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
+                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
             }
             }
         }
         }
     }
     }
+    tcg_temp_free_i64(tcg_ebytes);
 
 
     if (!is_store) {
     if (!is_store) {
         /* For non-quad operations, setting a slice of the low
         /* For non-quad operations, setting a slice of the low
@@ -3374,13 +3446,11 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
 
 
     if (is_postidx) {
     if (is_postidx) {
         if (rm == 31) {
         if (rm == 31) {
-            tcg_gen_mov_i64(tcg_rn, tcg_addr);
+            tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
         } else {
         } else {
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
         }
     }
     }
-    tcg_temp_free_i64(tcg_ebytes);
-    tcg_temp_free_i64(tcg_addr);
 }
 }
 
 
 /* AdvSIMD load/store single structure
 /* AdvSIMD load/store single structure
@@ -3423,7 +3493,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     bool replicate = false;
     bool replicate = false;
     int index = is_q << 3 | S << 2 | size;
     int index = is_q << 3 | S << 2 | size;
     int ebytes, xs;
     int ebytes, xs;
-    TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
+    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
 
 
     if (extract32(insn, 31, 1)) {
     if (extract32(insn, 31, 1)) {
         unallocated_encoding(s);
         unallocated_encoding(s);
@@ -3483,8 +3553,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
     }
     }
 
 
     tcg_rn = cpu_reg_sp(s, rn);
     tcg_rn = cpu_reg_sp(s, rn);
-    tcg_addr = tcg_temp_new_i64();
-    tcg_gen_mov_i64(tcg_addr, tcg_rn);
+    clean_addr = clean_data_tbi(s, tcg_rn);
     tcg_ebytes = tcg_const_i64(ebytes);
     tcg_ebytes = tcg_const_i64(ebytes);
 
 
     for (xs = 0; xs < selem; xs++) {
     for (xs = 0; xs < selem; xs++) {
@@ -3492,7 +3561,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
             /* Load and replicate to all elements */
             /* Load and replicate to all elements */
             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 
 
-            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
+            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
                                 get_mem_index(s), s->be_data + scale);
                                 get_mem_index(s), s->be_data + scale);
             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
                                  (is_q + 1) * 8, vec_full_reg_size(s),
                                  (is_q + 1) * 8, vec_full_reg_size(s),
@@ -3501,24 +3570,23 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
         } else {
         } else {
             /* Load/store one element per register */
             /* Load/store one element per register */
             if (is_load) {
             if (is_load) {
-                do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data);
+                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
             } else {
             } else {
-                do_vec_st(s, rt, index, tcg_addr, scale, s->be_data);
+                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
             }
             }
         }
         }
-        tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
+        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
         rt = (rt + 1) % 32;
         rt = (rt + 1) % 32;
     }
     }
+    tcg_temp_free_i64(tcg_ebytes);
 
 
     if (is_postidx) {
     if (is_postidx) {
         if (rm == 31) {
         if (rm == 31) {
-            tcg_gen_mov_i64(tcg_rn, tcg_addr);
+            tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
         } else {
         } else {
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
         }
         }
     }
     }
-    tcg_temp_free_i64(tcg_ebytes);
-    tcg_temp_free_i64(tcg_addr);
 }
 }
 
 
 /* Loads and stores */
 /* Loads and stores */
@@ -13753,6 +13821,90 @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
     }
     }
 }
 }
 
 
+/**
+ * is_guarded_page:
+ * @env: The cpu environment
+ * @s: The DisasContext
+ *
+ * Return true if the page is guarded.
+ */
+static bool is_guarded_page(CPUARMState *env, DisasContext *s)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;  /* FIXME */
+#else
+    uint64_t addr = s->base.pc_first;
+    int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
+    unsigned int index = tlb_index(env, mmu_idx, addr);
+    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+
+    /*
+     * We test this immediately after reading an insn, which means
+     * that any normal page must be in the TLB.  The only exception
+     * would be for executing from flash or device memory, which
+     * does not retain the TLB entry.
+     *
+     * FIXME: Assume false for those, for now.  We could use
+     * arm_cpu_get_phys_page_attrs_debug to re-read the page
+     * table entry even for that case.
+     */
+    return (tlb_hit(entry->addr_code, addr) &&
+            env->iotlb[mmu_idx][index].attrs.target_tlb_bit0);
+#endif
+}
+
+/**
+ * btype_destination_ok:
+ * @insn: The instruction at the branch destination
+ * @bt: SCTLR_ELx.BT
+ * @btype: PSTATE.BTYPE, and is non-zero
+ *
+ * On a guarded page, there are a limited number of insns
+ * that may be present at the branch target:
+ *   - branch target identifiers,
+ *   - paciasp, pacibsp,
+ *   - BRK insn
+ *   - HLT insn
+ * Anything else causes a Branch Target Exception.
+ *
+ * Return true if the branch is compatible, false to raise BTITRAP.
+ */
+static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
+{
+    if ((insn & 0xfffff01fu) == 0xd503201fu) {
+        /* HINT space */
+        switch (extract32(insn, 5, 7)) {
+        case 0b011001: /* PACIASP */
+        case 0b011011: /* PACIBSP */
+            /*
+             * If SCTLR_ELx.BT, then PACI*SP are not compatible
+             * with btype == 3.  Otherwise all btype are ok.
+             */
+            return !bt || btype != 3;
+        case 0b100000: /* BTI */
+            /* Not compatible with any btype.  */
+            return false;
+        case 0b100010: /* BTI c */
+            /* Not compatible with btype == 3 */
+            return btype != 3;
+        case 0b100100: /* BTI j */
+            /* Not compatible with btype == 2 */
+            return btype != 2;
+        case 0b100110: /* BTI jc */
+            /* Compatible with any btype.  */
+            return true;
+        }
+    } else {
+        switch (insn & 0xffe0001fu) {
+        case 0xd4200000u: /* BRK */
+        case 0xd4400000u: /* HLT */
+            /* Give priority to the breakpoint exception.  */
+            return true;
+        }
+    }
+    return false;
+}
+
 /* C3.1 A64 instruction index by encoding */
 /* C3.1 A64 instruction index by encoding */
 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
 {
 {
@@ -13764,6 +13916,43 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
 
 
     s->fp_access_checked = false;
     s->fp_access_checked = false;
 
 
+    if (dc_isar_feature(aa64_bti, s)) {
+        if (s->base.num_insns == 1) {
+            /*
+             * At the first insn of the TB, compute s->guarded_page.
+             * We delayed computing this until successfully reading
+             * the first insn of the TB, above.  This (mostly) ensures
+             * that the softmmu tlb entry has been populated, and the
+             * page table GP bit is available.
+             *
+             * Note that we need to compute this even if btype == 0,
+             * because this value is used for BR instructions later
+             * where ENV is not available.
+             */
+            s->guarded_page = is_guarded_page(env, s);
+
+            /* First insn can have btype set to non-zero.  */
+            tcg_debug_assert(s->btype >= 0);
+
+            /*
+             * Note that the Branch Target Exception has fairly high
+             * priority -- below debugging exceptions but above most
+             * everything else.  This allows us to handle this now
+             * instead of waiting until the insn is otherwise decoded.
+             */
+            if (s->btype != 0
+                && s->guarded_page
+                && !btype_destination_ok(insn, s->bt, s->btype)) {
+                gen_exception_insn(s, 4, EXCP_UDEF, syn_btitrap(s->btype),
+                                   default_exception_el(s));
+                return;
+            }
+        } else {
+            /* Not the first insn: btype must be 0.  */
+            tcg_debug_assert(s->btype == 0);
+        }
+    }
+
     switch (extract32(insn, 25, 4)) {
     switch (extract32(insn, 25, 4)) {
     case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
     case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
         unallocated_encoding(s);
         unallocated_encoding(s);
@@ -13800,6 +13989,14 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
 
 
     /* if we allocated any temporaries, free them here */
     /* if we allocated any temporaries, free them here */
     free_tmp_a64(s);
     free_tmp_a64(s);
+
+    /*
+     * After execution of most insns, btype is reset to 0.
+     * Note that we set btype == -1 when the insn sets btype.
+     */
+    if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
+        reset_btype(s);
+    }
 }
 }
 
 
 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
@@ -13829,6 +14026,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
     dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
     dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
+    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (dc->current_el == 0);
     dc->user = (dc->current_el == 0);
@@ -13837,6 +14035,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
     dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
     dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
     dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
     dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
     dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
+    dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
+    dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
     dc->vec_len = 0;
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->vec_stride = 0;
     dc->cp_regs = arm_cpu->cp_regs;
     dc->cp_regs = arm_cpu->cp_regs;

+ 11 - 1
target/arm/translate.h

@@ -26,7 +26,8 @@ typedef struct DisasContext {
     int user;
     int user;
 #endif
 #endif
     ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
     ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
-    uint8_t tbii;      /* TBI1|TBI0 for EL0/1 or TBI for EL2/3 */
+    uint8_t tbii;      /* TBI1|TBI0 for insns */
+    uint8_t tbid;      /* TBI1|TBI0 for data */
     bool ns;        /* Use non-secure CPREG bank on access */
     bool ns;        /* Use non-secure CPREG bank on access */
     int fp_excp_el; /* FP exception EL or 0 if enabled */
     int fp_excp_el; /* FP exception EL or 0 if enabled */
     int sve_excp_el; /* SVE exception EL or 0 if enabled */
     int sve_excp_el; /* SVE exception EL or 0 if enabled */
@@ -69,6 +70,15 @@ typedef struct DisasContext {
     bool ss_same_el;
     bool ss_same_el;
     /* True if v8.3-PAuth is active.  */
     /* True if v8.3-PAuth is active.  */
     bool pauth_active;
     bool pauth_active;
+    /* True with v8.5-BTI and SCTLR_ELx.BT* set.  */
+    bool bt;
+    /*
+     * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
+     *  < 0, set by the current instruction.
+     */
+    int8_t btype;
+    /* True if this page is guarded.  */
+    bool guarded_page;
     /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
     /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
     int c15_cpar;
     int c15_cpar;
     /* TCG op of the current insn_start.  */
     /* TCG op of the current insn_start.  */

+ 5 - 1
tests/tcg/aarch64/Makefile.target

@@ -8,10 +8,14 @@ VPATH 		+= $(AARCH64_SRC)
 # we don't build any of the ARM tests
 # we don't build any of the ARM tests
 AARCH64_TESTS=$(filter-out $(ARM_TESTS), $(TESTS))
 AARCH64_TESTS=$(filter-out $(ARM_TESTS), $(TESTS))
 AARCH64_TESTS+=fcvt
 AARCH64_TESTS+=fcvt
-TESTS:=$(AARCH64_TESTS)
 
 
 fcvt: LDFLAGS+=-lm
 fcvt: LDFLAGS+=-lm
 
 
 run-fcvt: fcvt
 run-fcvt: fcvt
 	$(call run-test,$<,$(QEMU) $<, "$< on $(TARGET_NAME)")
 	$(call run-test,$<,$(QEMU) $<, "$< on $(TARGET_NAME)")
 	$(call diff-out,$<,$(AARCH64_SRC)/fcvt.ref)
 	$(call diff-out,$<,$(AARCH64_SRC)/fcvt.ref)
+
+AARCH64_TESTS += pauth-1
+run-pauth-%: QEMU += -cpu max
+
+TESTS:=$(AARCH64_TESTS)

+ 23 - 0
tests/tcg/aarch64/pauth-1.c

@@ -0,0 +1,23 @@
+#include <assert.h>
+#include <sys/prctl.h>
+
+asm(".arch armv8.4-a");
+
+#ifndef PR_PAC_RESET_KEYS
+#define PR_PAC_RESET_KEYS  54
+#define PR_PAC_APDAKEY     (1 << 2)
+#endif
+
+int main()
+{
+    int x;
+    void *p0 = &x, *p1, *p2;
+
+    asm volatile("pacdza %0" : "=r"(p1) : "0"(p0));
+    prctl(PR_PAC_RESET_KEYS, PR_PAC_APDAKEY, 0, 0, 0);
+    asm volatile("pacdza %0" : "=r"(p2) : "0"(p0));
+
+    assert(p1 != p0);
+    assert(p1 != p2);
+    return 0;
+}