ソースを参照

qemu: fix crash on REV32 in TCTI

Implement vector immediate shifts in TCTI.
osy 1 ヶ月 前
コミット
eada611bcc
1 ファイル変更251 行追加0 行削除
  1. 251 0
      patches/qemu-10.0.2-utm.patch

+ 251 - 0
patches/qemu-10.0.2-utm.patch

@@ -0,0 +1,251 @@
+From 594fe2f680f571dfe1d69281b803bdcbac925f7a Mon Sep 17 00:00:00 2001
+From: osy <osy@turing.llc>
+Date: Fri, 18 Jul 2025 13:29:23 -0700
+Subject: [PATCH] tcg/tcti: implement vector immediate shifts
+
+This now seems to be required as a result of the introduction of
+gen_gvec_rev{16,32,64} in 38f9950c8e0315d7b26803018a3f73d5f42e6703.
+---
+ tcg/aarch64-tcti/tcg-target-has.h     |  2 +-
+ tcg/aarch64-tcti/tcg-target-opc.h.inc |  1 +
+ tcg/aarch64-tcti/tcg-target.c.inc     | 45 +++++++++++++++++--
+ tcg/aarch64-tcti/tcti-gadget-gen.py   | 63 +++++++++++++++++++++++++--
+ 4 files changed, 103 insertions(+), 8 deletions(-)
+
+diff --git a/tcg/aarch64-tcti/tcg-target-has.h b/tcg/aarch64-tcti/tcg-target-has.h
+index 8e39891c02..67b50fcdea 100644
+--- a/tcg/aarch64-tcti/tcg-target-has.h
++++ b/tcg/aarch64-tcti/tcg-target-has.h
+@@ -84,7 +84,7 @@
+ #define TCG_TARGET_HAS_roti_vec         0
+ #define TCG_TARGET_HAS_rots_vec         0
+ #define TCG_TARGET_HAS_rotv_vec         0
+-#define TCG_TARGET_HAS_shi_vec          0
++#define TCG_TARGET_HAS_shi_vec          1
+ #define TCG_TARGET_HAS_shs_vec          0
+ #define TCG_TARGET_HAS_shv_vec          1
+ #define TCG_TARGET_HAS_mul_vec          1
+diff --git a/tcg/aarch64-tcti/tcg-target-opc.h.inc b/tcg/aarch64-tcti/tcg-target-opc.h.inc
+index 88cf2bc53d..5382315c41 100644
+--- a/tcg/aarch64-tcti/tcg-target-opc.h.inc
++++ b/tcg/aarch64-tcti/tcg-target-opc.h.inc
+@@ -12,3 +12,4 @@
+  */
+ 
+ DEF(aa64_sshl_vec, 1, 2, 0, TCG_OPF_VECTOR)
++DEF(aa64_sli_vec, 1, 2, 1, TCG_OPF_VECTOR)
+diff --git a/tcg/aarch64-tcti/tcg-target.c.inc b/tcg/aarch64-tcti/tcg-target.c.inc
+index 7b1186cd01..8b78abe4bb 100644
+--- a/tcg/aarch64-tcti/tcg-target.c.inc
++++ b/tcg/aarch64-tcti/tcg-target.c.inc
+@@ -217,6 +217,8 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
+         return C_O1_I2(w, w, w);
+     case INDEX_op_bitsel_vec:
+         return C_O1_I3(w, w, w, w);
++    case INDEX_op_aa64_sli_vec:
++        return C_O1_I2(w, 0, w);
+ 
+     default:
+         return C_NotImplemented;
+@@ -490,6 +492,13 @@ static void tcg_out_ternary_gadget(TCGContext *s, const void *gadget_base[TCG_TA
+     tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
+ }
+ 
++
++/* Write gadget pointer (three registers, last is immediate value). */
++static void tcg_out_ternary_immediate_gadget(TCGContext *s, const void *gadget_base[TCG_TARGET_GP_REGS][TCG_TARGET_GP_REGS][TCTI_GADGET_IMMEDIATE_ARRAY_LEN], unsigned reg0, unsigned reg1, unsigned reg2)
++{
++    tcg_out_gadget(s, gadget_base[reg0][reg1][reg2]);
++}
++
+ /***************************
+  *  TCG Scalar Operations  *
+  ***************************/
+@@ -1558,13 +1567,18 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+     tcg_out_sized_vector_gadget_no64(s, name, ternary, vece, a, b, c)
+ 
+ 
+-#define tcg_out_ternary_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
++#define tcg_out_sized_gadget_with_scalar(s, name, arity, is_scalar, vece, args...) \
+     if (is_scalar) { \
+-        tcg_out_ternary_gadget(s, gadget_ ## name ## _scalar, w0, w1, w2); \
++        tcg_out_ ## arity ## _gadget(s, gadget_ ## name ## _scalar, args); \
+     } else { \
+-        tcg_out_ternary_vector_gadget(s, name, vece, w0, w1, w2); \
++        tcg_out_sized_vector_gadget(s, name, arity, vece, args); \
+     }
+ 
++#define tcg_out_ternary_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
++    tcg_out_sized_gadget_with_scalar(s, name, ternary, is_scalar, vece, a, b, c)
++
++#define tcg_out_ternary_immediate_vector_gadget_with_scalar(s, name, is_scalar, vece, a, b, c) \
++    tcg_out_sized_gadget_with_scalar(s, name, ternary_immediate, is_scalar, vece, a, b, c)
+ 
+ /* Return true if v16 is a valid 16-bit shifted immediate.  */
+ static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
+@@ -1765,6 +1779,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned
+         break;
+     }
+ 
++    /* inhibit compiler warning because we use imm as a register */
++    case INDEX_op_shli_vec:
++        tcg_out_ternary_immediate_vector_gadget_with_scalar(s, shl, is_scalar, vece, w0, w1, r2);
++        break;
++    case INDEX_op_shri_vec:
++        tcg_out_ternary_immediate_vector_gadget_with_scalar(s, ushr, is_scalar, vece, w0, w1, r2 - 1);
++        break;
++    case INDEX_op_sari_vec:
++        tcg_out_ternary_immediate_vector_gadget_with_scalar(s, sshr, is_scalar, vece, w0, w1, r2 - 1);
++        break;
++    case INDEX_op_aa64_sli_vec:
++        tcg_out_ternary_immediate_vector_gadget_with_scalar(s, sli, is_scalar, vece, w0, w2, r3);
++        break;
++
+     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
+     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
+     default:
+@@ -1787,6 +1815,9 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+     case INDEX_op_abs_vec:
+     case INDEX_op_not_vec:
+     case INDEX_op_cmp_vec:
++    case INDEX_op_shli_vec:
++    case INDEX_op_shri_vec:
++    case INDEX_op_sari_vec:
+     case INDEX_op_ssadd_vec:
+     case INDEX_op_sssub_vec:
+     case INDEX_op_usadd_vec:
+@@ -1827,6 +1858,14 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+     va_end(va);
+ 
+     switch (opc) {
++    case INDEX_op_rotli_vec:
++        t1 = tcg_temp_new_vec(type);
++        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
++        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
++                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
++        tcg_temp_free_vec(t1);
++        break;
++
+     case INDEX_op_shrv_vec:
+     case INDEX_op_sarv_vec:
+         /* Right shifts are negative left shifts for AArch64.  */
+diff --git a/tcg/aarch64-tcti/tcti-gadget-gen.py b/tcg/aarch64-tcti/tcti-gadget-gen.py
+index 275c4ba943..ebed824500 100755
+--- a/tcg/aarch64-tcti/tcti-gadget-gen.py
++++ b/tcg/aarch64-tcti/tcti-gadget-gen.py
+@@ -113,7 +113,7 @@ def simple(name, *lines, export=True):
+ 
+ 
+ 
+-def with_register_substitutions(name, substitutions, *lines, immediate_range=range(0)):
++def with_register_substitutions(name, substitutions, *lines, immediate_range=range(0), filter=lambda p: False):
+     """ Generates a collection of gadgtes with register substitutions. """
+ 
+     def _expand_op1_immediate(num):
+@@ -166,6 +166,10 @@ def substitutions_for_letter(letter, number, line):
+ 
+     #  For each permutation...
+     for permutation in permutations:
++        # Filter any invalid combination
++        if filter(permutation): 
++            continue
++
+         new_lines = lines
+ 
+         # Replace each placeholder element with its proper value...
+@@ -212,9 +216,9 @@ def with_dnm(name, *lines):
+     print("};", file=c_file)
+ 
+ 
+-def with_dn_immediate(name, *lines, immediate_range):
++def with_dn_immediate(name, *lines, immediate_range, filter=lambda m: False):
+     """ Generates a collection of gadgets with substitutions for Xd, Xn, and Xm, and equivalents. """
+-    with_register_substitutions(name, ["d", "n"], *lines, immediate_range=immediate_range)
++    with_register_substitutions(name, ["d", "n"], *lines, immediate_range=immediate_range, filter=lambda p: filter(p[-1]))
+ 
+     # Fetch the files we'll be using for output.
+     c_file, h_file = _get_output_files()
+@@ -236,7 +240,10 @@ def with_dn_immediate(name, *lines, immediate_range):
+ 
+             # M array
+             for i in immediate_range:
+-                print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ", file=c_file)
++                if filter(i):
++                    print(f"(void *)0", end=", ", file=c_file)
++                else:
++                    print(f"gadget_{name}_arg{d}_arg{n}_arg{i}", end=", ", file=c_file)
+ 
+             print("},", file=c_file)
+         print("\t},", file=c_file)
+@@ -625,6 +632,24 @@ def do_size_replacement(line, size):
+             sized_lines = (scalar,)
+         with_dnm(f"{name}_scalar", *sized_lines)
+ 
++def vector_dn_immediate(name, *lines, scalar=None, immediate_range, omit_sizes=(), filter=lambda s, m: False):
++    """ Creates a set of gadgets for every size of a given vector op. Accepts 'S' as a size placeholder. """
++
++    def do_size_replacement(line, size):
++        return line.replace(".S", f".{size}")
++        
++    # Create a variant for each size, replacing any placeholders.
++    for size in VECTOR_SIZES:
++        if size in omit_sizes:
++            continue
++
++        sized_lines = (do_size_replacement(line, size) for line in lines)
++        with_dn_immediate(f"{name}_{size}", *sized_lines, immediate_range=immediate_range, filter=lambda m: filter(size, m))
++
++    if scalar:
++        if isinstance(scalar, str):
++            sized_lines = (scalar,)
++        with_dn_immediate(f"{name}_scalar", *sized_lines, immediate_range=immediate_range, filter=lambda m: filter(None, m))
+ 
+ def vector_math_dnm(name, operation):
+     """ Generates a collection of gadgets for vector math instructions. """
+@@ -647,6 +672,9 @@ def vector_logic_dnm(name, operation):
+     with_dnm(f"{name}_d", f"{operation} Vd.8b, Vn.8b, Vm.8b")
+     with_dnm(f"{name}_q", f"{operation} Vd.16b, Vn.16b, Vm.16b")
+ 
++def vector_math_dn_immediate(name, operation, immediate_range, filter=lambda x: False):
++    """ Generates a collection of gadgets for vector math instructions. """
++    vector_dn_immediate(name, f"{operation} Vd.S, Vn.S, #Ii", scalar=f"{operation} Dd, Dn, #Ii", immediate_range=immediate_range, filter=filter)
+ 
+ #
+ # Gadget definitions.
+@@ -1088,6 +1116,33 @@ def vector_logic_dnm(name, operation):
+ vector_math_dnm("shlv", "ushl")
+ vector_math_dnm("sshl", "sshl")
+ 
++def filter_shl(size, imm):
++    match size:
++        case '16b': return imm >= 8
++        case '8b': return imm >= 8
++        case '4h': return imm >= 16
++        case '8h': return imm >= 16
++        case '2s': return imm >= 32
++        case '4s': return imm >= 32
++    return False
++
++def filter_shr(size, imm):
++    if imm == 0:
++        return True
++    match size:
++        case '16b': return imm > 8
++        case '8b': return imm > 8
++        case '4h': return imm > 16
++        case '8h': return imm > 16
++        case '2s': return imm > 32
++        case '4s': return imm > 32
++    return False
++
++vector_math_dn_immediate("shl", "shl", immediate_range=range(64), filter=filter_shl)
++vector_math_dn_immediate("ushr", "ushr", immediate_range=range(1,65), filter=filter_shr)
++vector_math_dn_immediate("sshr", "sshr", immediate_range=range(1,65), filter=filter_shr)
++vector_math_dn_immediate("sli", "sli", immediate_range=range(64), filter=filter_shl)
++
+ vector_dnm("cmeq", "cmeq Vd.S, Vn.S, Vm.S", scalar="cmeq Dd, Dn, Dm")
+ vector_dnm("cmgt", "cmgt Vd.S, Vn.S, Vm.S", scalar="cmgt Dd, Dn, Dm")
+ vector_dnm("cmge", "cmge Vd.S, Vn.S, Vm.S", scalar="cmge Dd, Dn, Dm")
+-- 
+2.41.0
+