Răsfoiți Sursa

Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-cap-20201003' into staging

Update capstone submodule from v3.0.5 to v5 ("next").
Convert submodule build to meson.
Enable capstone disassembly for s390x.
Code cleanups in disas.c

# gpg: Signature made Sat 03 Oct 2020 10:33:44 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-cap-20201003:
  disas/capstone: Add skipdata hook for s390x
  disas: Enable capstone disassembly for s390x
  disas: Split out capstone code to disas/capstone.c
  disas: Configure capstone for aarch64 host without libvixl
  disas: Cleanup plugin_disas
  disas: Use qemu/bswap.h for bfd endian loads
  disas: Clean up CPUDebug initialization
  disas: Move host asm annotations to tb_gen_code
  capstone: Require version 4.0 from a system library
  capstone: Update to upstream "next" branch
  capstone: Convert Makefile bits to meson bits

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 4 ani în urmă
părinte
comite
671ad7c446
14 a modificat fișierele cu 680 adăugiri și 699 ștergeri
  1. 4 14
      Makefile
  2. 15 9
      accel/tcg/translate-all.c
  3. 1 1
      capstone
  4. 9 59
      configure
  5. 157 538
      disas.c
  6. 326 0
      disas/capstone.c
  7. 1 0
      disas/meson.build
  8. 38 66
      include/disas/dis-asm.h
  9. 1 1
      include/disas/disas.h
  10. 2 2
      include/exec/log.h
  11. 116 7
      meson.build
  12. 4 0
      meson_options.txt
  13. 4 0
      target/s390x/cpu.c
  14. 2 2
      tcg/tcg.c

+ 4 - 14
Makefile

@@ -156,21 +156,11 @@ dtc/all: .git-submodule-status dtc/libfdt
 dtc/%: .git-submodule-status
 	@mkdir -p $@
 
-# Overriding CFLAGS causes us to lose defines added in the sub-makefile.
-# Not overriding CFLAGS leads to mis-matches between compilation modes.
-# Therefore we replicate some of the logic in the sub-makefile.
-# Remove all the extra -Warning flags that QEMU uses that Capstone doesn't;
-# no need to annoy QEMU developers with such things.
-CAP_CFLAGS = $(patsubst -W%,,$(CFLAGS) $(QEMU_CFLAGS)) $(CAPSTONE_CFLAGS)
-CAP_CFLAGS += -DCAPSTONE_USE_SYS_DYN_MEM
-CAP_CFLAGS += -DCAPSTONE_HAS_ARM
-CAP_CFLAGS += -DCAPSTONE_HAS_ARM64
-CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC
-CAP_CFLAGS += -DCAPSTONE_HAS_X86
-
+# Retain for a while so that incremental build across this patch
+# does not raise an error for missing target "capstone/all", which
+# comes from the saved SUBDIRS value.
 .PHONY: capstone/all
-capstone/all: .git-submodule-status
-	$(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE))
+capstone/all:
 
 .PHONY: slirp/all
 slirp/all: .git-submodule-status

+ 15 - 9
accel/tcg/translate-all.c

@@ -1816,10 +1816,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         qemu_log_in_addr_range(tb->pc)) {
         FILE *logfile = qemu_log_lock();
         int code_size, data_size = 0;
-        g_autoptr(GString) note = g_string_new("[tb header & initial instruction]");
-        size_t chunk_start = 0;
+        size_t chunk_start;
         int insn = 0;
-        qemu_log("OUT: [size=%d]\n", gen_code_size);
+
         if (tcg_ctx->data_gen_ptr) {
             code_size = tcg_ctx->data_gen_ptr - tb->tc.ptr;
             data_size = gen_code_size - code_size;
@@ -1828,26 +1827,33 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         }
 
         /* Dump header and the first instruction */
+        qemu_log("OUT: [size=%d]\n", gen_code_size);
+        qemu_log("  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
+                 tcg_ctx->gen_insn_data[insn][0]);
         chunk_start = tcg_ctx->gen_insn_end_off[insn];
-        log_disas(tb->tc.ptr, chunk_start, note->str);
+        log_disas(tb->tc.ptr, chunk_start);
 
         /*
          * Dump each instruction chunk, wrapping up empty chunks into
          * the next instruction. The whole array is offset so the
          * first entry is the beginning of the 2nd instruction.
          */
-        while (insn <= tb->icount && chunk_start < code_size) {
+        while (insn < tb->icount) {
             size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
             if (chunk_end > chunk_start) {
-                g_string_printf(note, "[guest addr: " TARGET_FMT_lx "]",
-                                tcg_ctx->gen_insn_data[insn][0]);
-                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start,
-                          note->str);
+                qemu_log("  -- guest addr 0x" TARGET_FMT_lx "\n",
+                         tcg_ctx->gen_insn_data[insn][0]);
+                log_disas(tb->tc.ptr + chunk_start, chunk_end - chunk_start);
                 chunk_start = chunk_end;
             }
             insn++;
         }
 
+        if (chunk_start < code_size) {
+            qemu_log("  -- tb slow paths + alignment\n");
+            log_disas(tb->tc.ptr + chunk_start, code_size - chunk_start);
+        }
+
         /* Finally dump any data we may have after the block */
         if (data_size) {
             int i;

+ 1 - 1
capstone

@@ -1 +1 @@
-Subproject commit 22ead3e0bfdb87516656453336160e0a37b066bf
+Subproject commit f8b1b833015a4ae47110ed068e0deb7106ced66d

+ 9 - 59
configure

@@ -478,7 +478,7 @@ opengl=""
 opengl_dmabuf="no"
 cpuid_h="no"
 avx2_opt=""
-capstone=""
+capstone="auto"
 lzo=""
 snappy=""
 bzip2=""
@@ -1575,11 +1575,11 @@ for opt do
   ;;
   --enable-vhost-kernel) vhost_kernel="yes"
   ;;
-  --disable-capstone) capstone="no"
+  --disable-capstone) capstone="disabled"
   ;;
-  --enable-capstone) capstone="yes"
+  --enable-capstone) capstone="enabled"
   ;;
-  --enable-capstone=git) capstone="git"
+  --enable-capstone=git) capstone="internal"
   ;;
   --enable-capstone=system) capstone="system"
   ;;
@@ -5017,51 +5017,11 @@ fi
 # capstone
 
 case "$capstone" in
-  "" | yes)
-    if $pkg_config capstone; then
-      capstone=system
-    elif test -e "${source_path}/.git" && test $git_update = 'yes' ; then
-      capstone=git
-    elif test -e "${source_path}/capstone/Makefile" ; then
-      capstone=internal
-    elif test -z "$capstone" ; then
-      capstone=no
-    else
-      feature_not_found "capstone" "Install capstone devel or git submodule"
-    fi
-    ;;
-
-  system)
-    if ! $pkg_config capstone; then
-      feature_not_found "capstone" "Install capstone devel"
-    fi
-    ;;
-esac
-
-case "$capstone" in
-  git | internal)
-    if test "$capstone" = git; then
+  auto | enabled | internal)
+    # Simpler to always update submodule, even if not needed.
+    if test -e "${source_path}/.git" && test $git_update = 'yes' ; then
       git_submodules="${git_submodules} capstone"
     fi
-    mkdir -p capstone
-    if test "$mingw32" = "yes"; then
-      LIBCAPSTONE=capstone.lib
-    else
-      LIBCAPSTONE=libcapstone.a
-    fi
-    capstone_libs="-Lcapstone -lcapstone"
-    capstone_cflags="-I${source_path}/capstone/include"
-    ;;
-
-  system)
-    capstone_libs="$($pkg_config --libs capstone)"
-    capstone_cflags="$($pkg_config --cflags capstone)"
-    ;;
-
-  no)
-    ;;
-  *)
-    error_exit "Unknown state for capstone: $capstone"
     ;;
 esac
 
@@ -7142,11 +7102,6 @@ fi
 if test "$ivshmem" = "yes" ; then
   echo "CONFIG_IVSHMEM=y" >> $config_host_mak
 fi
-if test "$capstone" != "no" ; then
-  echo "CONFIG_CAPSTONE=y" >> $config_host_mak
-  echo "CAPSTONE_CFLAGS=$capstone_cflags" >> $config_host_mak
-  echo "CAPSTONE_LIBS=$capstone_libs" >> $config_host_mak
-fi
 if test "$debug_mutex" = "yes" ; then
   echo "CONFIG_DEBUG_MUTEX=y" >> $config_host_mak
 fi
@@ -7664,13 +7619,7 @@ done # for target in $targets
 if [ "$fdt" = "git" ]; then
   subdirs="$subdirs dtc"
 fi
-if [ "$capstone" = "git" -o "$capstone" = "internal" ]; then
-  subdirs="$subdirs capstone"
-fi
 echo "SUBDIRS=$subdirs" >> $config_host_mak
-if test -n "$LIBCAPSTONE"; then
-  echo "LIBCAPSTONE=$LIBCAPSTONE" >> $config_host_mak
-fi
 
 if test "$numa" = "yes"; then
   echo "CONFIG_NUMA=y" >> $config_host_mak
@@ -7846,7 +7795,8 @@ NINJA=${ninja:-$PWD/ninjatool} $meson setup \
 	-Dmalloc=$malloc -Dmalloc_trim=$malloc_trim \
 	-Dcocoa=$cocoa -Dmpath=$mpath -Dsdl=$sdl -Dsdl_image=$sdl_image \
 	-Dvnc=$vnc -Dvnc_sasl=$vnc_sasl -Dvnc_jpeg=$vnc_jpeg -Dvnc_png=$vnc_png \
-	-Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f\
+	-Dgettext=$gettext -Dxkbcommon=$xkbcommon -Du2f=$u2f \
+	-Dcapstone=$capstone \
         $cross_arg \
         "$PWD" "$source_path"
 

+ 157 - 538
disas.c

@@ -16,133 +16,68 @@ typedef struct CPUDebug {
 /* Filled in by elfload.c.  Simplistic, but will do for now. */
 struct syminfo *syminfos = NULL;
 
-/* Get LENGTH bytes from info's buffer, at target address memaddr.
-   Transfer them to myaddr.  */
-int
-buffer_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
-                   struct disassemble_info *info)
+/*
+ * Get LENGTH bytes from info's buffer, at host address memaddr.
+ * Transfer them to myaddr.
+ */
+static int host_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
+                            struct disassemble_info *info)
 {
     if (memaddr < info->buffer_vma
-        || memaddr + length > info->buffer_vma + info->buffer_length)
+        || memaddr + length > info->buffer_vma + info->buffer_length) {
         /* Out of bounds.  Use EIO because GDB uses it.  */
         return EIO;
+    }
     memcpy (myaddr, info->buffer + (memaddr - info->buffer_vma), length);
     return 0;
 }
 
-/* Get LENGTH bytes from info's buffer, at target address memaddr.
-   Transfer them to myaddr.  */
-static int
-target_read_memory (bfd_vma memaddr,
-                    bfd_byte *myaddr,
-                    int length,
-                    struct disassemble_info *info)
+/*
+ * Get LENGTH bytes from info's buffer, at target address memaddr.
+ * Transfer them to myaddr.
+ */
+static int target_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
+                              struct disassemble_info *info)
 {
     CPUDebug *s = container_of(info, CPUDebug, info);
-    int r;
-
-    r = cpu_memory_rw_debug(s->cpu, memaddr, myaddr, length, 0);
-
+    int r = cpu_memory_rw_debug(s->cpu, memaddr, myaddr, length, 0);
     return r ? EIO : 0;
 }
 
-/* Print an error message.  We can assume that this is in response to
-   an error return from buffer_read_memory.  */
-void
-perror_memory (int status, bfd_vma memaddr, struct disassemble_info *info)
+/*
+ * Print an error message.  We can assume that this is in response to
+ * an error return from {host,target}_read_memory.
+ */
+static void perror_memory(int status, bfd_vma memaddr,
+                          struct disassemble_info *info)
 {
-  if (status != EIO)
-    /* Can't happen.  */
-    (*info->fprintf_func) (info->stream, "Unknown error %d\n", status);
-  else
-    /* Actually, address between memaddr and memaddr + len was
-       out of bounds.  */
-    (*info->fprintf_func) (info->stream,
-			   "Address 0x%" PRIx64 " is out of bounds.\n", memaddr);
+    if (status != EIO) {
+        /* Can't happen.  */
+        info->fprintf_func(info->stream, "Unknown error %d\n", status);
+    } else {
+        /* Address between memaddr and memaddr + len was out of bounds.  */
+        info->fprintf_func(info->stream,
+                           "Address 0x%" PRIx64 " is out of bounds.\n",
+                           memaddr);
+    }
 }
 
-/* This could be in a separate file, to save minuscule amounts of space
-   in statically linked executables.  */
-
-/* Just print the address is hex.  This is included for completeness even
-   though both GDB and objdump provide their own (to print symbolic
-   addresses).  */
-
-void
-generic_print_address (bfd_vma addr, struct disassemble_info *info)
+/* Print address in hex. */
+static void print_address(bfd_vma addr, struct disassemble_info *info)
 {
-    (*info->fprintf_func) (info->stream, "0x%" PRIx64, addr);
+    info->fprintf_func(info->stream, "0x%" PRIx64, addr);
 }
 
 /* Print address in hex, truncated to the width of a host virtual address. */
-static void
-generic_print_host_address(bfd_vma addr, struct disassemble_info *info)
+static void host_print_address(bfd_vma addr, struct disassemble_info *info)
 {
-    uint64_t mask = ~0ULL >> (64 - (sizeof(void *) * 8));
-    generic_print_address(addr & mask, info);
+    print_address((uintptr_t)addr, info);
 }
 
-/* Just return the given address.  */
-
-int
-generic_symbol_at_address (bfd_vma addr, struct disassemble_info *info)
+/* Stub prevents some fruitless earching in optabs disassemblers. */
+static int symbol_at_address(bfd_vma addr, struct disassemble_info *info)
 {
-  return 1;
-}
-
-bfd_vma bfd_getl64 (const bfd_byte *addr)
-{
-  unsigned long long v;
-
-  v = (unsigned long long) addr[0];
-  v |= (unsigned long long) addr[1] << 8;
-  v |= (unsigned long long) addr[2] << 16;
-  v |= (unsigned long long) addr[3] << 24;
-  v |= (unsigned long long) addr[4] << 32;
-  v |= (unsigned long long) addr[5] << 40;
-  v |= (unsigned long long) addr[6] << 48;
-  v |= (unsigned long long) addr[7] << 56;
-  return (bfd_vma) v;
-}
-
-bfd_vma bfd_getl32 (const bfd_byte *addr)
-{
-  unsigned long v;
-
-  v = (unsigned long) addr[0];
-  v |= (unsigned long) addr[1] << 8;
-  v |= (unsigned long) addr[2] << 16;
-  v |= (unsigned long) addr[3] << 24;
-  return (bfd_vma) v;
-}
-
-bfd_vma bfd_getb32 (const bfd_byte *addr)
-{
-  unsigned long v;
-
-  v = (unsigned long) addr[0] << 24;
-  v |= (unsigned long) addr[1] << 16;
-  v |= (unsigned long) addr[2] << 8;
-  v |= (unsigned long) addr[3];
-  return (bfd_vma) v;
-}
-
-bfd_vma bfd_getl16 (const bfd_byte *addr)
-{
-  unsigned long v;
-
-  v = (unsigned long) addr[0];
-  v |= (unsigned long) addr[1] << 8;
-  return (bfd_vma) v;
-}
-
-bfd_vma bfd_getb16 (const bfd_byte *addr)
-{
-  unsigned long v;
-
-  v = (unsigned long) addr[0] << 24;
-  v |= (unsigned long) addr[1] << 16;
-  return (bfd_vma) v;
+    return 1;
 }
 
 static int print_insn_objdump(bfd_vma pc, disassemble_info *info,
@@ -174,292 +109,121 @@ static int print_insn_od_target(bfd_vma pc, disassemble_info *info)
     return print_insn_objdump(pc, info, "OBJD-T");
 }
 
-#ifdef CONFIG_CAPSTONE
-/* Temporary storage for the capstone library.  This will be alloced via
-   malloc with a size private to the library; thus there's no reason not
-   to share this across calls and across host vs target disassembly.  */
-static __thread cs_insn *cap_insn;
-
-/* Initialize the Capstone library.  */
-/* ??? It would be nice to cache this.  We would need one handle for the
-   host and one for the target.  For most targets we can reset specific
-   parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
-   CS_ARCH_* in this way.  Thus we would need to be able to close and
-   re-open the target handle with a different arch for the target in order
-   to handle AArch64 vs AArch32 mode switching.  */
-static cs_err cap_disas_start(disassemble_info *info, csh *handle)
-{
-    cs_mode cap_mode = info->cap_mode;
-    cs_err err;
-
-    cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
-                 : CS_MODE_LITTLE_ENDIAN);
-
-    err = cs_open(info->cap_arch, cap_mode, handle);
-    if (err != CS_ERR_OK) {
-        return err;
-    }
-
-    /* ??? There probably ought to be a better place to put this.  */
-    if (info->cap_arch == CS_ARCH_X86) {
-        /* We don't care about errors (if for some reason the library
-           is compiled without AT&T syntax); the user will just have
-           to deal with the Intel syntax.  */
-        cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
-    }
-
-    /* "Disassemble" unknown insns as ".byte W,X,Y,Z".  */
-    cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
-
-    /* Allocate temp space for cs_disasm_iter.  */
-    if (cap_insn == NULL) {
-        cap_insn = cs_malloc(*handle);
-        if (cap_insn == NULL) {
-            cs_close(handle);
-            return CS_ERR_MEM;
-        }
-    }
-    return CS_ERR_OK;
-}
-
-static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
-                                int i, int n)
-{
-    fprintf_function print = info->fprintf_func;
-    FILE *stream = info->stream;
-
-    switch (info->cap_insn_unit) {
-    case 4:
-        if (info->endian == BFD_ENDIAN_BIG) {
-            for (; i < n; i += 4) {
-                print(stream, " %08x", ldl_be_p(insn->bytes + i));
-
-            }
-        } else {
-            for (; i < n; i += 4) {
-                print(stream, " %08x", ldl_le_p(insn->bytes + i));
-            }
-        }
-        break;
-
-    case 2:
-        if (info->endian == BFD_ENDIAN_BIG) {
-            for (; i < n; i += 2) {
-                print(stream, " %04x", lduw_be_p(insn->bytes + i));
-            }
-        } else {
-            for (; i < n; i += 2) {
-                print(stream, " %04x", lduw_le_p(insn->bytes + i));
-            }
-        }
-        break;
-
-    default:
-        for (; i < n; i++) {
-            print(stream, " %02x", insn->bytes[i]);
-        }
-        break;
-    }
-}
-
-static void cap_dump_insn(disassemble_info *info, cs_insn *insn,
-                          const char *note)
+static void initialize_debug(CPUDebug *s)
 {
-    fprintf_function print = info->fprintf_func;
-    int i, n, split;
-
-    print(info->stream, "0x%08" PRIx64 ": ", insn->address);
-
-    n = insn->size;
-    split = info->cap_insn_split;
-
-    /* Dump the first SPLIT bytes of the instruction.  */
-    cap_dump_insn_units(info, insn, 0, MIN(n, split));
-
-    /* Add padding up to SPLIT so that mnemonics line up.  */
-    if (n < split) {
-        int width = (split - n) / info->cap_insn_unit;
-        width *= (2 * info->cap_insn_unit + 1);
-        print(info->stream, "%*s", width, "");
-    }
-
-    /* Print the actual instruction.  */
-    print(info->stream, "  %-8s %s", insn->mnemonic, insn->op_str);
-    if (note) {
-        print(info->stream, "\t\t%s", note);
-    }
-    print(info->stream, "\n");
-
-    /* Dump any remaining part of the insn on subsequent lines.  */
-    for (i = split; i < n; i += split) {
-        print(info->stream, "0x%08" PRIx64 ": ", insn->address + i);
-        cap_dump_insn_units(info, insn, i, MIN(n, i + split));
-        print(info->stream, "\n");
-    }
+    memset(s, 0, sizeof(*s));
+    s->info.arch = bfd_arch_unknown;
+    s->info.cap_arch = -1;
+    s->info.cap_insn_unit = 4;
+    s->info.cap_insn_split = 4;
+    s->info.memory_error_func = perror_memory;
+    s->info.symbol_at_address_func = symbol_at_address;
 }
 
-/* Disassemble SIZE bytes at PC for the target.  */
-static bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
+static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
 {
-    uint8_t cap_buf[1024];
-    csh handle;
-    cs_insn *insn;
-    size_t csize = 0;
-
-    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
-        return false;
-    }
-    insn = cap_insn;
-
-    while (1) {
-        size_t tsize = MIN(sizeof(cap_buf) - csize, size);
-        const uint8_t *cbuf = cap_buf;
-
-        target_read_memory(pc + csize, cap_buf + csize, tsize, info);
-        csize += tsize;
-        size -= tsize;
-
-        while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
-            cap_dump_insn(info, insn, NULL);
-        }
-
-        /* If the target memory is not consumed, go back for more... */
-        if (size != 0) {
-            /* ... taking care to move any remaining fractional insn
-               to the beginning of the buffer.  */
-            if (csize != 0) {
-                memmove(cap_buf, cbuf, csize);
-            }
-            continue;
-        }
+    initialize_debug(s);
 
-        /* Since the target memory is consumed, we should not have
-           a remaining fractional insn.  */
-        if (csize != 0) {
-            (*info->fprintf_func)(info->stream,
-                "Disassembler disagrees with translator "
-                "over instruction decoding\n"
-                "Please report this to qemu-devel@nongnu.org\n");
-        }
-        break;
-    }
-
-    cs_close(&handle);
-    return true;
-}
-
-/* Disassemble SIZE bytes at CODE for the host.  */
-static bool cap_disas_host(disassemble_info *info, void *code, size_t size,
-                           const char *note)
-{
-    csh handle;
-    const uint8_t *cbuf;
-    cs_insn *insn;
-    uint64_t pc;
-
-    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
-        return false;
-    }
-    insn = cap_insn;
-
-    cbuf = code;
-    pc = (uintptr_t)code;
+    s->cpu = cpu;
+    s->info.read_memory_func = target_read_memory;
+    s->info.print_address_func = print_address;
+#ifdef TARGET_WORDS_BIGENDIAN
+    s->info.endian = BFD_ENDIAN_BIG;
+#else
+    s->info.endian = BFD_ENDIAN_LITTLE;
+#endif
 
-    while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
-        cap_dump_insn(info, insn, note);
-        note = NULL;
-    }
-    if (size != 0) {
-        (*info->fprintf_func)(info->stream,
-            "Disassembler disagrees with TCG over instruction encoding\n"
-            "Please report this to qemu-devel@nongnu.org\n");
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+    if (cc->disas_set_info) {
+        cc->disas_set_info(cpu, &s->info);
     }
-
-    cs_close(&handle);
-    return true;
 }
 
-#if !defined(CONFIG_USER_ONLY)
-/* Disassemble COUNT insns at PC for the target.  */
-static bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
+static void initialize_debug_host(CPUDebug *s)
 {
-    uint8_t cap_buf[32];
-    csh handle;
-    cs_insn *insn;
-    size_t csize = 0;
+    initialize_debug(s);
 
-    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
-        return false;
-    }
-    insn = cap_insn;
-
-    while (1) {
-        /* We want to read memory for one insn, but generically we do not
-           know how much memory that is.  We have a small buffer which is
-           known to be sufficient for all supported targets.  Try to not
-           read beyond the page, Just In Case.  For even more simplicity,
-           ignore the actual target page size and use a 1k boundary.  If
-           that turns out to be insufficient, we'll come back around the
-           loop and read more.  */
-        uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
-        size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
-        const uint8_t *cbuf = cap_buf;
-
-        /* Make certain that we can make progress.  */
-        assert(tsize != 0);
-        info->read_memory_func(pc, cap_buf + csize, tsize, info);
-        csize += tsize;
-
-        if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
-            cap_dump_insn(info, insn, NULL);
-            if (--count <= 0) {
-                break;
-            }
-        }
-        memmove(cap_buf, cbuf, csize);
-    }
-
-    cs_close(&handle);
-    return true;
-}
-#endif /* !CONFIG_USER_ONLY */
+    s->info.read_memory_func = host_read_memory;
+    s->info.print_address_func = host_print_address;
+#ifdef HOST_WORDS_BIGENDIAN
+    s->info.endian = BFD_ENDIAN_BIG;
 #else
-# define cap_disas_target(i, p, s)  false
-# define cap_disas_host(i, p, s, n)  false
-# define cap_disas_monitor(i, p, c)  false
-# define cap_disas_plugin(i, p, c) false
-#endif /* CONFIG_CAPSTONE */
+    s->info.endian = BFD_ENDIAN_LITTLE;
+#endif
+#if defined(CONFIG_TCG_INTERPRETER)
+    s->info.print_insn = print_insn_tci;
+#elif defined(__i386__)
+    s->info.mach = bfd_mach_i386_i386;
+    s->info.print_insn = print_insn_i386;
+    s->info.cap_arch = CS_ARCH_X86;
+    s->info.cap_mode = CS_MODE_32;
+    s->info.cap_insn_unit = 1;
+    s->info.cap_insn_split = 8;
+#elif defined(__x86_64__)
+    s->info.mach = bfd_mach_x86_64;
+    s->info.print_insn = print_insn_i386;
+    s->info.cap_arch = CS_ARCH_X86;
+    s->info.cap_mode = CS_MODE_64;
+    s->info.cap_insn_unit = 1;
+    s->info.cap_insn_split = 8;
+#elif defined(_ARCH_PPC)
+    s->info.disassembler_options = (char *)"any";
+    s->info.print_insn = print_insn_ppc;
+    s->info.cap_arch = CS_ARCH_PPC;
+# ifdef _ARCH_PPC64
+    s->info.cap_mode = CS_MODE_64;
+# endif
+#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
+#if defined(_ILP32) || (__riscv_xlen == 32)
+    s->info.print_insn = print_insn_riscv32;
+#elif defined(_LP64)
+    s->info.print_insn = print_insn_riscv64;
+#else
+#error unsupported RISC-V ABI
+#endif
+#elif defined(__aarch64__)
+    s->info.cap_arch = CS_ARCH_ARM64;
+# ifdef CONFIG_ARM_A64_DIS
+    s->info.print_insn = print_insn_arm_a64;
+# endif
+#elif defined(__alpha__)
+    s->info.print_insn = print_insn_alpha;
+#elif defined(__sparc__)
+    s->info.print_insn = print_insn_sparc;
+    s->info.mach = bfd_mach_sparc_v9b;
+#elif defined(__arm__)
+    /* TCG only generates code for arm mode.  */
+    s->info.print_insn = print_insn_arm;
+    s->info.cap_arch = CS_ARCH_ARM;
+#elif defined(__MIPSEB__)
+    s->info.print_insn = print_insn_big_mips;
+#elif defined(__MIPSEL__)
+    s->info.print_insn = print_insn_little_mips;
+#elif defined(__m68k__)
+    s->info.print_insn = print_insn_m68k;
+#elif defined(__s390__)
+    s->info.print_insn = print_insn_s390;
+    s->info.cap_arch = CS_ARCH_SYSZ;
+    s->info.cap_insn_unit = 2;
+    s->info.cap_insn_split = 6;
+#elif defined(__hppa__)
+    s->info.print_insn = print_insn_hppa;
+#endif
+}
 
 /* Disassemble this for me please... (debugging).  */
 void target_disas(FILE *out, CPUState *cpu, target_ulong code,
                   target_ulong size)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     target_ulong pc;
     int count;
     CPUDebug s;
 
-    INIT_DISASSEMBLE_INFO(s.info, out, fprintf);
-
-    s.cpu = cpu;
-    s.info.read_memory_func = target_read_memory;
+    initialize_debug_target(&s, cpu);
+    s.info.fprintf_func = fprintf;
+    s.info.stream = out;
     s.info.buffer_vma = code;
     s.info.buffer_length = size;
-    s.info.print_address_func = generic_print_address;
-    s.info.cap_arch = -1;
-    s.info.cap_mode = 0;
-    s.info.cap_insn_unit = 4;
-    s.info.cap_insn_split = 4;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    s.info.endian = BFD_ENDIAN_BIG;
-#else
-    s.info.endian = BFD_ENDIAN_LITTLE;
-#endif
-
-    if (cc->disas_set_info) {
-        cc->disas_set_info(cpu, &s.info);
-    }
 
     if (s.info.cap_arch >= 0 && cap_disas_target(&s.info, code, size)) {
         return;
@@ -485,13 +249,12 @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
     }
 }
 
-static __thread GString plugin_disas_output;
-
 static int plugin_printf(FILE *stream, const char *fmt, ...)
 {
-    va_list va;
-    GString *s = &plugin_disas_output;
+    /* We abuse the FILE parameter to pass a GString. */
+    GString *s = (GString *)stream;
     int initial_len = s->len;
+    va_list va;
 
     va_start(va, fmt);
     g_string_append_vprintf(s, fmt, va);
@@ -506,40 +269,6 @@ static void plugin_print_address(bfd_vma addr, struct disassemble_info *info)
 }
 
 
-#ifdef CONFIG_CAPSTONE
-/* Disassemble a single instruction directly into plugin output */
-static
-bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
-{
-    uint8_t cap_buf[1024];
-    csh handle;
-    cs_insn *insn;
-    size_t csize = 0;
-    int count;
-    GString *s = &plugin_disas_output;
-
-    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
-        return false;
-    }
-    insn = cap_insn;
-
-    size_t tsize = MIN(sizeof(cap_buf) - csize, size);
-    const uint8_t *cbuf = cap_buf;
-    target_read_memory(pc, cap_buf, tsize, info);
-
-    count = cs_disasm(handle, cbuf, size, 0, 1, &insn);
-
-    if (count) {
-        g_string_printf(s, "%s %s", insn->mnemonic, insn->op_str);
-    } else {
-        g_string_printf(s, "cs_disasm failed");
-    }
-
-    cs_close(&handle);
-    return true;
-}
-#endif
-
 /*
  * We should only be dissembling one instruction at a time here. If
  * there is left over it usually indicates the front end has read more
@@ -547,146 +276,52 @@ bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
  */
 char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-    int count;
     CPUDebug s;
-    GString *ds = g_string_set_size(&plugin_disas_output, 0);
-
-    g_assert(ds == &plugin_disas_output);
+    GString *ds = g_string_new(NULL);
 
-    INIT_DISASSEMBLE_INFO(s.info, NULL, plugin_printf);
-
-    s.cpu = cpu;
-    s.info.read_memory_func = target_read_memory;
+    initialize_debug_target(&s, cpu);
+    s.info.fprintf_func = plugin_printf;
+    s.info.stream = (FILE *)ds;  /* abuse this slot */
     s.info.buffer_vma = addr;
     s.info.buffer_length = size;
     s.info.print_address_func = plugin_print_address;
-    s.info.cap_arch = -1;
-    s.info.cap_mode = 0;
-    s.info.cap_insn_unit = 4;
-    s.info.cap_insn_split = 4;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    s.info.endian = BFD_ENDIAN_BIG;
-#else
-    s.info.endian = BFD_ENDIAN_LITTLE;
-#endif
-
-    if (cc->disas_set_info) {
-        cc->disas_set_info(cpu, &s.info);
-    }
 
     if (s.info.cap_arch >= 0 && cap_disas_plugin(&s.info, addr, size)) {
-        return g_strdup(ds->str);
-    }
-
-    if (s.info.print_insn == NULL) {
-        s.info.print_insn = print_insn_od_target;
-    }
-
-    count = s.info.print_insn(addr, &s.info);
-
-    /* The decoder probably read more than it needed it's not critical */
-    if (count < size) {
-        warn_report("%s: %zu bytes left over", __func__, size - count);
+        ; /* done */
+    } else if (s.info.print_insn) {
+        s.info.print_insn(addr, &s.info);
+    } else {
+        ; /* cannot disassemble -- return empty string */
     }
 
-    return g_strdup(ds->str);
+    /* Return the buffer, freeing the GString container.  */
+    return g_string_free(ds, false);
 }
 
 /* Disassemble this for me please... (debugging). */
-void disas(FILE *out, void *code, unsigned long size, const char *note)
+void disas(FILE *out, void *code, unsigned long size)
 {
     uintptr_t pc;
     int count;
     CPUDebug s;
-    int (*print_insn)(bfd_vma pc, disassemble_info *info) = NULL;
-
-    INIT_DISASSEMBLE_INFO(s.info, out, fprintf);
-    s.info.print_address_func = generic_print_host_address;
 
+    initialize_debug_host(&s);
+    s.info.fprintf_func = fprintf;
+    s.info.stream = out;
     s.info.buffer = code;
     s.info.buffer_vma = (uintptr_t)code;
     s.info.buffer_length = size;
-    s.info.cap_arch = -1;
-    s.info.cap_mode = 0;
-    s.info.cap_insn_unit = 4;
-    s.info.cap_insn_split = 4;
-
-#ifdef HOST_WORDS_BIGENDIAN
-    s.info.endian = BFD_ENDIAN_BIG;
-#else
-    s.info.endian = BFD_ENDIAN_LITTLE;
-#endif
-#if defined(CONFIG_TCG_INTERPRETER)
-    print_insn = print_insn_tci;
-#elif defined(__i386__)
-    s.info.mach = bfd_mach_i386_i386;
-    print_insn = print_insn_i386;
-    s.info.cap_arch = CS_ARCH_X86;
-    s.info.cap_mode = CS_MODE_32;
-    s.info.cap_insn_unit = 1;
-    s.info.cap_insn_split = 8;
-#elif defined(__x86_64__)
-    s.info.mach = bfd_mach_x86_64;
-    print_insn = print_insn_i386;
-    s.info.cap_arch = CS_ARCH_X86;
-    s.info.cap_mode = CS_MODE_64;
-    s.info.cap_insn_unit = 1;
-    s.info.cap_insn_split = 8;
-#elif defined(_ARCH_PPC)
-    s.info.disassembler_options = (char *)"any";
-    print_insn = print_insn_ppc;
-    s.info.cap_arch = CS_ARCH_PPC;
-# ifdef _ARCH_PPC64
-    s.info.cap_mode = CS_MODE_64;
-# endif
-#elif defined(__riscv) && defined(CONFIG_RISCV_DIS)
-#if defined(_ILP32) || (__riscv_xlen == 32)
-    print_insn = print_insn_riscv32;
-#elif defined(_LP64)
-    print_insn = print_insn_riscv64;
-#else
-#error unsupported RISC-V ABI
-#endif
-#elif defined(__aarch64__) && defined(CONFIG_ARM_A64_DIS)
-    print_insn = print_insn_arm_a64;
-    s.info.cap_arch = CS_ARCH_ARM64;
-#elif defined(__alpha__)
-    print_insn = print_insn_alpha;
-#elif defined(__sparc__)
-    print_insn = print_insn_sparc;
-    s.info.mach = bfd_mach_sparc_v9b;
-#elif defined(__arm__)
-    print_insn = print_insn_arm;
-    s.info.cap_arch = CS_ARCH_ARM;
-    /* TCG only generates code for arm mode.  */
-#elif defined(__MIPSEB__)
-    print_insn = print_insn_big_mips;
-#elif defined(__MIPSEL__)
-    print_insn = print_insn_little_mips;
-#elif defined(__m68k__)
-    print_insn = print_insn_m68k;
-#elif defined(__s390__)
-    print_insn = print_insn_s390;
-#elif defined(__hppa__)
-    print_insn = print_insn_hppa;
-#endif
 
-    if (s.info.cap_arch >= 0 && cap_disas_host(&s.info, code, size, note)) {
+    if (s.info.cap_arch >= 0 && cap_disas_host(&s.info, code, size)) {
         return;
     }
 
-    if (print_insn == NULL) {
-        print_insn = print_insn_od_host;
+    if (s.info.print_insn == NULL) {
+        s.info.print_insn = print_insn_od_host;
     }
     for (pc = (uintptr_t)code; size > 0; pc += count, size -= count) {
         fprintf(out, "0x%08" PRIxPTR ":  ", pc);
-        count = print_insn(pc, &s.info);
-        if (note) {
-            fprintf(out, "\t\t%s", note);
-            note = NULL;
-        }
+        count = s.info.print_insn(pc, &s.info);
         fprintf(out, "\n");
         if (count < 0) {
             break;
@@ -731,31 +366,15 @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
 void monitor_disas(Monitor *mon, CPUState *cpu,
                    target_ulong pc, int nb_insn, int is_physical)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     int count, i;
     CPUDebug s;
 
-    INIT_DISASSEMBLE_INFO(s.info, NULL, qemu_fprintf);
-
-    s.cpu = cpu;
-    s.info.read_memory_func
-        = (is_physical ? physical_read_memory : target_read_memory);
-    s.info.print_address_func = generic_print_address;
-    s.info.buffer_vma = pc;
-    s.info.cap_arch = -1;
-    s.info.cap_mode = 0;
-    s.info.cap_insn_unit = 4;
-    s.info.cap_insn_split = 4;
-
-#ifdef TARGET_WORDS_BIGENDIAN
-    s.info.endian = BFD_ENDIAN_BIG;
-#else
-    s.info.endian = BFD_ENDIAN_LITTLE;
-#endif
-
-    if (cc->disas_set_info) {
-        cc->disas_set_info(cpu, &s.info);
+    initialize_debug_target(&s, cpu);
+    s.info.fprintf_func = qemu_fprintf;
+    if (is_physical) {
+        s.info.read_memory_func = physical_read_memory;
     }
+    s.info.buffer_vma = pc;
 
     if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
         return;

+ 326 - 0
disas/capstone.c

@@ -0,0 +1,326 @@
+/*
+ * Interface to the capstone disassembler.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "disas/dis-asm.h"
+#include "disas/capstone.h"
+
+
+/*
+ * Temporary storage for the capstone library.  This will be alloced via
+ * malloc with a size private to the library; thus there's no reason not
+ * to share this across calls and across host vs target disassembly.
+ */
+static __thread cs_insn *cap_insn;
+
+/*
+ * The capstone library always skips 2 bytes for S390X.
+ * This is less than ideal, since we can tell from the first two bits
+ * the size of the insn and thus stay in sync with the insn stream.
+ */
+static size_t CAPSTONE_API
+cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
+                      size_t offset, void *user_data)
+{
+    size_t ilen;
+
+    /* See get_ilen() in target/s390x/internal.h.  */
+    switch (code[offset] >> 6) {
+    case 0:
+        ilen = 2;
+        break;
+    case 1:
+    case 2:
+        ilen = 4;
+        break;
+    default:
+        ilen = 6;
+        break;
+    }
+
+    return ilen;
+}
+
+static const cs_opt_skipdata cap_skipdata_s390x = {
+    .mnemonic = ".byte",
+    .callback = cap_skipdata_s390x_cb
+};
+
+/*
+ * Initialize the Capstone library.
+ *
+ * ??? It would be nice to cache this.  We would need one handle for the
+ * host and one for the target.  For most targets we can reset specific
+ * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
+ * CS_ARCH_* in this way.  Thus we would need to be able to close and
+ * re-open the target handle with a different arch for the target in order
+ * to handle AArch64 vs AArch32 mode switching.
+ */
+static cs_err cap_disas_start(disassemble_info *info, csh *handle)
+{
+    cs_mode cap_mode = info->cap_mode;
+    cs_err err;
+
+    cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
+                 : CS_MODE_LITTLE_ENDIAN);
+
+    err = cs_open(info->cap_arch, cap_mode, handle);
+    if (err != CS_ERR_OK) {
+        return err;
+    }
+
+    /* "Disassemble" unknown insns as ".byte W,X,Y,Z".  */
+    cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
+
+    switch (info->cap_arch) {
+    case CS_ARCH_SYSZ:
+        cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
+                  (uintptr_t)&cap_skipdata_s390x);
+        break;
+
+    case CS_ARCH_X86:
+        /*
+         * We don't care about errors (if for some reason the library
+         * is compiled without AT&T syntax); the user will just have
+         * to deal with the Intel syntax.
+         */
+        cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+        break;
+    }
+
+    /* Allocate temp space for cs_disasm_iter.  */
+    if (cap_insn == NULL) {
+        cap_insn = cs_malloc(*handle);
+        if (cap_insn == NULL) {
+            cs_close(handle);
+            return CS_ERR_MEM;
+        }
+    }
+    return CS_ERR_OK;
+}
+
+static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
+                                int i, int n)
+{
+    fprintf_function print = info->fprintf_func;
+    FILE *stream = info->stream;
+
+    switch (info->cap_insn_unit) {
+    case 4:
+        if (info->endian == BFD_ENDIAN_BIG) {
+            for (; i < n; i += 4) {
+                print(stream, " %08x", ldl_be_p(insn->bytes + i));
+
+            }
+        } else {
+            for (; i < n; i += 4) {
+                print(stream, " %08x", ldl_le_p(insn->bytes + i));
+            }
+        }
+        break;
+
+    case 2:
+        if (info->endian == BFD_ENDIAN_BIG) {
+            for (; i < n; i += 2) {
+                print(stream, " %04x", lduw_be_p(insn->bytes + i));
+            }
+        } else {
+            for (; i < n; i += 2) {
+                print(stream, " %04x", lduw_le_p(insn->bytes + i));
+            }
+        }
+        break;
+
+    default:
+        for (; i < n; i++) {
+            print(stream, " %02x", insn->bytes[i]);
+        }
+        break;
+    }
+}
+
+static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
+{
+    fprintf_function print = info->fprintf_func;
+    FILE *stream = info->stream;
+    int i, n, split;
+
+    print(stream, "0x%08" PRIx64 ": ", insn->address);
+
+    n = insn->size;
+    split = info->cap_insn_split;
+
+    /* Dump the first SPLIT bytes of the instruction.  */
+    cap_dump_insn_units(info, insn, 0, MIN(n, split));
+
+    /* Add padding up to SPLIT so that mnemonics line up.  */
+    if (n < split) {
+        int width = (split - n) / info->cap_insn_unit;
+        width *= (2 * info->cap_insn_unit + 1);
+        print(stream, "%*s", width, "");
+    }
+
+    /* Print the actual instruction.  */
+    print(stream, "  %-8s %s\n", insn->mnemonic, insn->op_str);
+
+    /* Dump any remaining part of the insn on subsequent lines.  */
+    for (i = split; i < n; i += split) {
+        print(stream, "0x%08" PRIx64 ": ", insn->address + i);
+        cap_dump_insn_units(info, insn, i, MIN(n, i + split));
+        print(stream, "\n");
+    }
+}
+
+/* Disassemble SIZE bytes at PC for the target.  */
+bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
+{
+    uint8_t cap_buf[1024];
+    csh handle;
+    cs_insn *insn;
+    size_t csize = 0;
+
+    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
+        return false;
+    }
+    insn = cap_insn;
+
+    while (1) {
+        size_t tsize = MIN(sizeof(cap_buf) - csize, size);
+        const uint8_t *cbuf = cap_buf;
+
+        info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
+        csize += tsize;
+        size -= tsize;
+
+        while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
+            cap_dump_insn(info, insn);
+        }
+
+        /* If the target memory is not consumed, go back for more... */
+        if (size != 0) {
+            /*
+             * ... taking care to move any remaining fractional insn
+             * to the beginning of the buffer.
+             */
+            if (csize != 0) {
+                memmove(cap_buf, cbuf, csize);
+            }
+            continue;
+        }
+
+        /*
+         * Since the target memory is consumed, we should not have
+         * a remaining fractional insn.
+         */
+        if (csize != 0) {
+            info->fprintf_func(info->stream,
+                "Disassembler disagrees with translator "
+                "over instruction decoding\n"
+                "Please report this to qemu-devel@nongnu.org\n");
+        }
+        break;
+    }
+
+    cs_close(&handle);
+    return true;
+}
+
+/* Disassemble SIZE bytes at CODE for the host.  */
+bool cap_disas_host(disassemble_info *info, void *code, size_t size)
+{
+    csh handle;
+    const uint8_t *cbuf;
+    cs_insn *insn;
+    uint64_t pc;
+
+    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
+        return false;
+    }
+    insn = cap_insn;
+
+    cbuf = code;
+    pc = (uintptr_t)code;
+
+    while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
+        cap_dump_insn(info, insn);
+    }
+    if (size != 0) {
+        info->fprintf_func(info->stream,
+            "Disassembler disagrees with TCG over instruction encoding\n"
+            "Please report this to qemu-devel@nongnu.org\n");
+    }
+
+    cs_close(&handle);
+    return true;
+}
+
+/* Disassemble COUNT insns at PC for the target.  */
+bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
+{
+    uint8_t cap_buf[32];
+    csh handle;
+    cs_insn *insn;
+    size_t csize = 0;
+
+    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
+        return false;
+    }
+    insn = cap_insn;
+
+    while (1) {
+        /*
+         * We want to read memory for one insn, but generically we do not
+         * know how much memory that is.  We have a small buffer which is
+         * known to be sufficient for all supported targets.  Try to not
+         * read beyond the page, Just In Case.  For even more simplicity,
+         * ignore the actual target page size and use a 1k boundary.  If
+         * that turns out to be insufficient, we'll come back around the
+         * loop and read more.
+         */
+        uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
+        size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
+        const uint8_t *cbuf = cap_buf;
+
+        /* Make certain that we can make progress.  */
+        assert(tsize != 0);
+        info->read_memory_func(pc, cap_buf + csize, tsize, info);
+        csize += tsize;
+
+        if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
+            cap_dump_insn(info, insn);
+            if (--count <= 0) {
+                break;
+            }
+        }
+        memmove(cap_buf, cbuf, csize);
+    }
+
+    cs_close(&handle);
+    return true;
+}
+
+/* Disassemble a single instruction directly into plugin output */
+bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
+{
+    uint8_t cap_buf[32];
+    const uint8_t *cbuf = cap_buf;
+    csh handle;
+
+    if (cap_disas_start(info, &handle) != CS_ERR_OK) {
+        return false;
+    }
+
+    assert(size < sizeof(cap_buf));
+    info->read_memory_func(pc, cap_buf, size, info);
+
+    if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
+        info->fprintf_func(info->stream, "%s %s",
+                           cap_insn->mnemonic, cap_insn->op_str);
+    }
+
+    cs_close(&handle);
+    return true;
+}

+ 1 - 0
disas/meson.build

@@ -21,5 +21,6 @@ common_ss.add(when: 'CONFIG_S390_DIS', if_true: files('s390.c'))
 common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
 common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
 common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
+common_ss.add(when: capstone, if_true: files('capstone.c'))
 
 specific_ss.add(when: 'CONFIG_TCG_INTERPRETER', if_true: files('tci.c'))

+ 38 - 66
include/disas/dis-asm.h

@@ -406,7 +406,6 @@ typedef struct disassemble_info {
 
 } disassemble_info;
 
-
 /* Standard disassemblers.  Disassemble one instruction at the given
    target address.  Return number of bytes processed.  */
 typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
@@ -461,66 +460,17 @@ int print_insn_riscv32          (bfd_vma, disassemble_info*);
 int print_insn_riscv64          (bfd_vma, disassemble_info*);
 int print_insn_rx(bfd_vma, disassemble_info *);
 
-#if 0
-/* Fetch the disassembler for a given BFD, if that support is available.  */
-disassembler_ftype disassembler(bfd *);
-#endif
-
-
-/* This block of definitions is for particular callers who read instructions
-   into a buffer before calling the instruction decoder.  */
-
-/* Here is a function which callers may wish to use for read_memory_func.
-   It gets bytes from a buffer.  */
-int buffer_read_memory(bfd_vma, bfd_byte *, int, struct disassemble_info *);
-
-/* This function goes with buffer_read_memory.
-   It prints a message using info->fprintf_func and info->stream.  */
-void perror_memory(int, bfd_vma, struct disassemble_info *);
-
-
-/* Just print the address in hex.  This is included for completeness even
-   though both GDB and objdump provide their own (to print symbolic
-   addresses).  */
-void generic_print_address(bfd_vma, struct disassemble_info *);
-
-/* Always true.  */
-int generic_symbol_at_address(bfd_vma, struct disassemble_info *);
-
-/* Macro to initialize a disassemble_info struct.  This should be called
-   by all applications creating such a struct.  */
-#define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC) \
-  (INFO).flavour = bfd_target_unknown_flavour, \
-  (INFO).arch = bfd_arch_unknown, \
-  (INFO).mach = 0, \
-  (INFO).endian = BFD_ENDIAN_UNKNOWN, \
-  INIT_DISASSEMBLE_INFO_NO_ARCH(INFO, STREAM, FPRINTF_FUNC)
-
-/* Call this macro to initialize only the internal variables for the
-   disassembler.  Architecture dependent things such as byte order, or machine
-   variant are not touched by this macro.  This makes things much easier for
-   GDB which must initialize these things separately.  */
-
-#define INIT_DISASSEMBLE_INFO_NO_ARCH(INFO, STREAM, FPRINTF_FUNC) \
-  (INFO).fprintf_func = (FPRINTF_FUNC), \
-  (INFO).stream = (STREAM), \
-  (INFO).symbols = NULL, \
-  (INFO).num_symbols = 0, \
-  (INFO).private_data = NULL, \
-  (INFO).buffer = NULL, \
-  (INFO).buffer_vma = 0, \
-  (INFO).buffer_length = 0, \
-  (INFO).read_memory_func = buffer_read_memory, \
-  (INFO).memory_error_func = perror_memory, \
-  (INFO).print_address_func = generic_print_address, \
-  (INFO).print_insn = NULL, \
-  (INFO).symbol_at_address_func = generic_symbol_at_address, \
-  (INFO).flags = 0, \
-  (INFO).bytes_per_line = 0, \
-  (INFO).bytes_per_chunk = 0, \
-  (INFO).display_endian = BFD_ENDIAN_UNKNOWN, \
-  (INFO).disassembler_options = NULL, \
-  (INFO).insn_info_valid = 0
+#ifdef CONFIG_CAPSTONE
+bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size);
+bool cap_disas_host(disassemble_info *info, void *code, size_t size);
+bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count);
+bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size);
+#else
+# define cap_disas_target(i, p, s)  false
+# define cap_disas_host(i, p, s)    false
+# define cap_disas_monitor(i, p, c) false
+# define cap_disas_plugin(i, p, c)  false
+#endif /* CONFIG_CAPSTONE */
 
 #ifndef ATTRIBUTE_UNUSED
 #define ATTRIBUTE_UNUSED __attribute__((unused))
@@ -528,11 +478,33 @@ int generic_symbol_at_address(bfd_vma, struct disassemble_info *);
 
 /* from libbfd */
 
-bfd_vma bfd_getl64 (const bfd_byte *addr);
-bfd_vma bfd_getl32 (const bfd_byte *addr);
-bfd_vma bfd_getb32 (const bfd_byte *addr);
-bfd_vma bfd_getl16 (const bfd_byte *addr);
-bfd_vma bfd_getb16 (const bfd_byte *addr);
+#include "qemu/bswap.h"
+
+static inline bfd_vma bfd_getl64(const bfd_byte *addr)
+{
+    return ldq_le_p(addr);
+}
+
+static inline bfd_vma bfd_getl32(const bfd_byte *addr)
+{
+    return (uint32_t)ldl_le_p(addr);
+}
+
+static inline bfd_vma bfd_getl16(const bfd_byte *addr)
+{
+    return lduw_le_p(addr);
+}
+
+static inline bfd_vma bfd_getb32(const bfd_byte *addr)
+{
+    return (uint32_t)ldl_be_p(addr);
+}
+
+static inline bfd_vma bfd_getb16(const bfd_byte *addr)
+{
+    return lduw_be_p(addr);
+}
+
 typedef bool bfd_boolean;
 
 #endif /* DISAS_DIS_ASM_H */

+ 1 - 1
include/disas/disas.h

@@ -7,7 +7,7 @@
 #include "cpu.h"
 
 /* Disassemble this for me please... (debugging). */
-void disas(FILE *out, void *code, unsigned long size, const char *note);
+void disas(FILE *out, void *code, unsigned long size);
 void target_disas(FILE *out, CPUState *cpu, target_ulong code,
                   target_ulong size);
 

+ 2 - 2
include/exec/log.h

@@ -56,13 +56,13 @@ static inline void log_target_disas(CPUState *cpu, target_ulong start,
     rcu_read_unlock();
 }
 
-static inline void log_disas(void *code, unsigned long size, const char *note)
+static inline void log_disas(void *code, unsigned long size)
 {
     QemuLogFile *logfile;
     rcu_read_lock();
     logfile = qatomic_rcu_read(&qemu_logfile);
     if (logfile) {
-        disas(logfile->fd, code, size, note);
+        disas(logfile->fd, code, size);
     }
     rcu_read_unlock();
 }

+ 116 - 7
meson.build

@@ -10,6 +10,7 @@ else
   keyval = import('unstable-keyval')
 endif
 ss = import('sourceset')
+fs = import('fs')
 
 sh = find_program('sh')
 cc = meson.get_compiler('c')
@@ -495,11 +496,6 @@ if 'CONFIG_USB_LIBUSB' in config_host
   libusb = declare_dependency(compile_args: config_host['LIBUSB_CFLAGS'].split(),
                               link_args: config_host['LIBUSB_LIBS'].split())
 endif
-capstone = not_found
-if 'CONFIG_CAPSTONE' in config_host
-  capstone = declare_dependency(compile_args: config_host['CAPSTONE_CFLAGS'].split(),
-                                link_args: config_host['CAPSTONE_LIBS'].split())
-endif
 libpmem = not_found
 if 'CONFIG_LIBPMEM' in config_host
   libpmem = declare_dependency(compile_args: config_host['LIBPMEM_CFLAGS'].split(),
@@ -581,7 +577,6 @@ foreach k, v: config_host
     config_host_data.set(k, v == 'y' ? 1 : v)
   endif
 endforeach
-genh += configure_file(output: 'config-host.h', configuration: config_host_data)
 
 minikconf = find_program('scripts/minikconf.py')
 config_all = {}
@@ -736,6 +731,119 @@ config_all += {
   'CONFIG_ALL': true,
 }
 
+# Submodules
+
+capstone = not_found
+capstone_opt = get_option('capstone')
+if capstone_opt in ['enabled', 'auto', 'system']
+  have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile')
+  capstone = dependency('capstone', version: '>=4.0',
+                        static: enable_static, method: 'pkg-config',
+                        required: capstone_opt == 'system' or
+                                  capstone_opt == 'enabled' and not have_internal)
+  if capstone.found()
+    capstone_opt = 'system'
+  elif have_internal
+    capstone_opt = 'internal'
+  else
+    capstone_opt = 'disabled'
+  endif
+endif
+if capstone_opt == 'internal'
+  capstone_data = configuration_data()
+  capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1')
+
+  capstone_files = files(
+    'capstone/cs.c',
+    'capstone/MCInst.c',
+    'capstone/MCInstrDesc.c',
+    'capstone/MCRegisterInfo.c',
+    'capstone/SStream.c',
+    'capstone/utils.c'
+  )
+
+  if 'CONFIG_ARM_DIS' in config_all_disas
+    capstone_data.set('CAPSTONE_HAS_ARM', '1')
+    capstone_files += files(
+      'capstone/arch/ARM/ARMDisassembler.c',
+      'capstone/arch/ARM/ARMInstPrinter.c',
+      'capstone/arch/ARM/ARMMapping.c',
+      'capstone/arch/ARM/ARMModule.c'
+    )
+  endif
+
+  # FIXME: This config entry currently depends on a c++ compiler.
+  # Which is needed for building libvixl, but not for capstone.
+  if 'CONFIG_ARM_A64_DIS' in config_all_disas
+    capstone_data.set('CAPSTONE_HAS_ARM64', '1')
+    capstone_files += files(
+      'capstone/arch/AArch64/AArch64BaseInfo.c',
+      'capstone/arch/AArch64/AArch64Disassembler.c',
+      'capstone/arch/AArch64/AArch64InstPrinter.c',
+      'capstone/arch/AArch64/AArch64Mapping.c',
+      'capstone/arch/AArch64/AArch64Module.c'
+    )
+  endif
+
+  if 'CONFIG_PPC_DIS' in config_all_disas
+    capstone_data.set('CAPSTONE_HAS_POWERPC', '1')
+    capstone_files += files(
+      'capstone/arch/PowerPC/PPCDisassembler.c',
+      'capstone/arch/PowerPC/PPCInstPrinter.c',
+      'capstone/arch/PowerPC/PPCMapping.c',
+      'capstone/arch/PowerPC/PPCModule.c'
+    )
+  endif
+
+  if 'CONFIG_S390_DIS' in config_all_disas
+    capstone_data.set('CAPSTONE_HAS_SYSZ', '1')
+    capstone_files += files(
+      'capstone/arch/SystemZ/SystemZDisassembler.c',
+      'capstone/arch/SystemZ/SystemZInstPrinter.c',
+      'capstone/arch/SystemZ/SystemZMapping.c',
+      'capstone/arch/SystemZ/SystemZModule.c',
+      'capstone/arch/SystemZ/SystemZMCTargetDesc.c'
+    )
+  endif
+
+  if 'CONFIG_I386_DIS' in config_all_disas
+    capstone_data.set('CAPSTONE_HAS_X86', 1)
+    capstone_files += files(
+      'capstone/arch/X86/X86Disassembler.c',
+      'capstone/arch/X86/X86DisassemblerDecoder.c',
+      'capstone/arch/X86/X86ATTInstPrinter.c',
+      'capstone/arch/X86/X86IntelInstPrinter.c',
+      'capstone/arch/X86/X86InstPrinterCommon.c',
+      'capstone/arch/X86/X86Mapping.c',
+      'capstone/arch/X86/X86Module.c'
+    )
+  endif
+
+  configure_file(output: 'capstone-defs.h', configuration: capstone_data)
+
+  capstone_cargs = [
+    # FIXME: There does not seem to be a way to completely replace the c_args
+    # that come from add_project_arguments() -- we can only add to them.
+    # So: disable all warnings with a big hammer.
+    '-Wno-error', '-w',
+
+    # Include all configuration defines via a header file, which will wind up
+    # as a dependency on the object file, and thus changes here will result
+    # in a rebuild.
+    '-include', 'capstone-defs.h'
+  ]
+
+  libcapstone = static_library('capstone',
+                               sources: capstone_files,
+                               c_args: capstone_cargs,
+                               include_directories: 'capstone/include')
+  capstone = declare_dependency(link_with: libcapstone,
+                                include_directories: 'capstone/include/capstone')
+endif
+config_host_data.set('CONFIG_CAPSTONE', capstone.found())
+
+genh += configure_file(output: 'config-host.h', configuration: config_host_data)
+
 # Generators
 
 hxtool = find_program('scripts/hxtool')
@@ -1007,6 +1115,7 @@ common_ss.add(files('cpus-common.c'))
 
 subdir('softmmu')
 
+common_ss.add(capstone)
 specific_ss.add(files('disas.c', 'exec.c', 'gdbstub.c'), capstone, libpmem, libdaxctl)
 specific_ss.add(files('exec-vary.c'))
 specific_ss.add(when: 'CONFIG_TCG', if_true: files(
@@ -1566,7 +1675,7 @@ summary_info += {'vvfat support':     config_host.has_key('CONFIG_VVFAT')}
 summary_info += {'qed support':       config_host.has_key('CONFIG_QED')}
 summary_info += {'parallels support': config_host.has_key('CONFIG_PARALLELS')}
 summary_info += {'sheepdog support':  config_host.has_key('CONFIG_SHEEPDOG')}
-summary_info += {'capstone':          config_host.has_key('CONFIG_CAPSTONE')}
+summary_info += {'capstone':          capstone_opt == 'disabled' ? false : capstone_opt}
 summary_info += {'libpmem support':   config_host.has_key('CONFIG_LIBPMEM')}
 summary_info += {'libdaxctl support': config_host.has_key('CONFIG_LIBDAXCTL')}
 summary_info += {'libudev':           libudev.found()}

+ 4 - 0
meson_options.txt

@@ -31,3 +31,7 @@ option('vnc_sasl', type : 'feature', value : 'auto',
        description: 'SASL authentication for VNC server')
 option('xkbcommon', type : 'feature', value : 'auto',
        description: 'xkbcommon support')
+
+option('capstone', type: 'combo', value: 'auto',
+       choices: ['disabled', 'enabled', 'auto', 'system', 'internal'],
+       description: 'Whether and how to find the capstone library')

+ 4 - 0
target/s390x/cpu.c

@@ -44,6 +44,7 @@
 #include "sysemu/tcg.h"
 #endif
 #include "fpu/softfloat-helpers.h"
+#include "disas/capstone.h"
 
 #define CR0_RESET       0xE0UL
 #define CR14_RESET      0xC2000000UL;
@@ -182,6 +183,9 @@ static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
 {
     info->mach = bfd_mach_s390_64;
     info->print_insn = print_insn_s390;
+    info->cap_arch = CS_ARCH_SYSZ;
+    info->cap_insn_unit = 2;
+    info->cap_insn_split = 6;
 }
 
 static void s390_cpu_realizefn(DeviceState *dev, Error **errp)

+ 2 - 2
tcg/tcg.c

@@ -1101,7 +1101,7 @@ void tcg_prologue_init(TCGContext *s)
             size_t data_size = prologue_size - code_size;
             size_t i;
 
-            log_disas(buf0, code_size, NULL);
+            log_disas(buf0, code_size);
 
             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
                 if (sizeof(tcg_target_ulong) == 8) {
@@ -1115,7 +1115,7 @@ void tcg_prologue_init(TCGContext *s)
                 }
             }
         } else {
-            log_disas(buf0, prologue_size, NULL);
+            log_disas(buf0, prologue_size);
         }
         qemu_log("\n");
         qemu_log_flush();