|
@@ -0,0 +1,296 @@
|
|
|
+/*
|
|
|
+ * ARM generic helpers for various arithmetical operations.
|
|
|
+ *
|
|
|
+ * This code is licensed under the GNU GPL v2 or later.
|
|
|
+ *
|
|
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
+ */
|
|
|
+#include "qemu/osdep.h"
|
|
|
+#include "cpu.h"
|
|
|
+#include "exec/helper-proto.h"
|
|
|
+#include "qemu/crc32c.h"
|
|
|
+#include <zlib.h> /* for crc32 */
|
|
|
+
|
|
|
+/*
|
|
|
+ * Note that signed overflow is undefined in C. The following routines are
|
|
|
+ * careful to use unsigned types where modulo arithmetic is required.
|
|
|
+ * Failure to do so _will_ break on newer gcc.
|
|
|
+ */
|
|
|
+
|
|
|
+/* Signed saturating arithmetic. */
|
|
|
+
|
|
|
+/* Perform 16-bit signed saturating addition. */
|
|
|
+static inline uint16_t add16_sat(uint16_t a, uint16_t b)
|
|
|
+{
|
|
|
+ uint16_t res;
|
|
|
+
|
|
|
+ res = a + b;
|
|
|
+ if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
|
|
|
+ if (a & 0x8000) {
|
|
|
+ res = 0x8000;
|
|
|
+ } else {
|
|
|
+ res = 0x7fff;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+/* Perform 8-bit signed saturating addition. */
|
|
|
+static inline uint8_t add8_sat(uint8_t a, uint8_t b)
|
|
|
+{
|
|
|
+ uint8_t res;
|
|
|
+
|
|
|
+ res = a + b;
|
|
|
+ if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
|
|
|
+ if (a & 0x80) {
|
|
|
+ res = 0x80;
|
|
|
+ } else {
|
|
|
+ res = 0x7f;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+/* Perform 16-bit signed saturating subtraction. */
|
|
|
+static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
|
|
|
+{
|
|
|
+ uint16_t res;
|
|
|
+
|
|
|
+ res = a - b;
|
|
|
+ if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
|
|
|
+ if (a & 0x8000) {
|
|
|
+ res = 0x8000;
|
|
|
+ } else {
|
|
|
+ res = 0x7fff;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+/* Perform 8-bit signed saturating subtraction. */
|
|
|
+static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
|
|
|
+{
|
|
|
+ uint8_t res;
|
|
|
+
|
|
|
+ res = a - b;
|
|
|
+ if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
|
|
|
+ if (a & 0x80) {
|
|
|
+ res = 0x80;
|
|
|
+ } else {
|
|
|
+ res = 0x7f;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+#define ADD16(a, b, n) RESULT(add16_sat(a, b), n, 16);
|
|
|
+#define SUB16(a, b, n) RESULT(sub16_sat(a, b), n, 16);
|
|
|
+#define ADD8(a, b, n) RESULT(add8_sat(a, b), n, 8);
|
|
|
+#define SUB8(a, b, n) RESULT(sub8_sat(a, b), n, 8);
|
|
|
+#define PFX q
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+/* Unsigned saturating arithmetic. */
|
|
|
+static inline uint16_t add16_usat(uint16_t a, uint16_t b)
|
|
|
+{
|
|
|
+ uint16_t res;
|
|
|
+ res = a + b;
|
|
|
+ if (res < a) {
|
|
|
+ res = 0xffff;
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
|
|
|
+{
|
|
|
+ if (a > b) {
|
|
|
+ return a - b;
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static inline uint8_t add8_usat(uint8_t a, uint8_t b)
|
|
|
+{
|
|
|
+ uint8_t res;
|
|
|
+ res = a + b;
|
|
|
+ if (res < a) {
|
|
|
+ res = 0xff;
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
|
|
|
+{
|
|
|
+ if (a > b) {
|
|
|
+ return a - b;
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
|
|
|
+#define SUB16(a, b, n) RESULT(sub16_usat(a, b), n, 16);
|
|
|
+#define ADD8(a, b, n) RESULT(add8_usat(a, b), n, 8);
|
|
|
+#define SUB8(a, b, n) RESULT(sub8_usat(a, b), n, 8);
|
|
|
+#define PFX uq
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+/* Signed modulo arithmetic. */
|
|
|
+#define SARITH16(a, b, n, op) do { \
|
|
|
+ int32_t sum; \
|
|
|
+ sum = (int32_t)(int16_t)(a) op (int32_t)(int16_t)(b); \
|
|
|
+ RESULT(sum, n, 16); \
|
|
|
+ if (sum >= 0) \
|
|
|
+ ge |= 3 << (n * 2); \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define SARITH8(a, b, n, op) do { \
|
|
|
+ int32_t sum; \
|
|
|
+ sum = (int32_t)(int8_t)(a) op (int32_t)(int8_t)(b); \
|
|
|
+ RESULT(sum, n, 8); \
|
|
|
+ if (sum >= 0) \
|
|
|
+ ge |= 1 << n; \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+
|
|
|
+#define ADD16(a, b, n) SARITH16(a, b, n, +)
|
|
|
+#define SUB16(a, b, n) SARITH16(a, b, n, -)
|
|
|
+#define ADD8(a, b, n) SARITH8(a, b, n, +)
|
|
|
+#define SUB8(a, b, n) SARITH8(a, b, n, -)
|
|
|
+#define PFX s
|
|
|
+#define ARITH_GE
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+/* Unsigned modulo arithmetic. */
|
|
|
+#define ADD16(a, b, n) do { \
|
|
|
+ uint32_t sum; \
|
|
|
+ sum = (uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b); \
|
|
|
+ RESULT(sum, n, 16); \
|
|
|
+ if ((sum >> 16) == 1) \
|
|
|
+ ge |= 3 << (n * 2); \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define ADD8(a, b, n) do { \
|
|
|
+ uint32_t sum; \
|
|
|
+ sum = (uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b); \
|
|
|
+ RESULT(sum, n, 8); \
|
|
|
+ if ((sum >> 8) == 1) \
|
|
|
+ ge |= 1 << n; \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define SUB16(a, b, n) do { \
|
|
|
+ uint32_t sum; \
|
|
|
+ sum = (uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b); \
|
|
|
+ RESULT(sum, n, 16); \
|
|
|
+ if ((sum >> 16) == 0) \
|
|
|
+ ge |= 3 << (n * 2); \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define SUB8(a, b, n) do { \
|
|
|
+ uint32_t sum; \
|
|
|
+ sum = (uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b); \
|
|
|
+ RESULT(sum, n, 8); \
|
|
|
+ if ((sum >> 8) == 0) \
|
|
|
+ ge |= 1 << n; \
|
|
|
+ } while (0)
|
|
|
+
|
|
|
+#define PFX u
|
|
|
+#define ARITH_GE
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+/* Halved signed arithmetic. */
|
|
|
+#define ADD16(a, b, n) \
|
|
|
+ RESULT(((int32_t)(int16_t)(a) + (int32_t)(int16_t)(b)) >> 1, n, 16)
|
|
|
+#define SUB16(a, b, n) \
|
|
|
+ RESULT(((int32_t)(int16_t)(a) - (int32_t)(int16_t)(b)) >> 1, n, 16)
|
|
|
+#define ADD8(a, b, n) \
|
|
|
+ RESULT(((int32_t)(int8_t)(a) + (int32_t)(int8_t)(b)) >> 1, n, 8)
|
|
|
+#define SUB8(a, b, n) \
|
|
|
+ RESULT(((int32_t)(int8_t)(a) - (int32_t)(int8_t)(b)) >> 1, n, 8)
|
|
|
+#define PFX sh
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+/* Halved unsigned arithmetic. */
|
|
|
+#define ADD16(a, b, n) \
|
|
|
+ RESULT(((uint32_t)(uint16_t)(a) + (uint32_t)(uint16_t)(b)) >> 1, n, 16)
|
|
|
+#define SUB16(a, b, n) \
|
|
|
+ RESULT(((uint32_t)(uint16_t)(a) - (uint32_t)(uint16_t)(b)) >> 1, n, 16)
|
|
|
+#define ADD8(a, b, n) \
|
|
|
+ RESULT(((uint32_t)(uint8_t)(a) + (uint32_t)(uint8_t)(b)) >> 1, n, 8)
|
|
|
+#define SUB8(a, b, n) \
|
|
|
+ RESULT(((uint32_t)(uint8_t)(a) - (uint32_t)(uint8_t)(b)) >> 1, n, 8)
|
|
|
+#define PFX uh
|
|
|
+
|
|
|
+#include "op_addsub.c.inc"
|
|
|
+
|
|
|
+static inline uint8_t do_usad(uint8_t a, uint8_t b)
|
|
|
+{
|
|
|
+ if (a > b) {
|
|
|
+ return a - b;
|
|
|
+ } else {
|
|
|
+ return b - a;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/* Unsigned sum of absolute byte differences. */
|
|
|
+uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
|
|
|
+{
|
|
|
+ uint32_t sum;
|
|
|
+ sum = do_usad(a, b);
|
|
|
+ sum += do_usad(a >> 8, b >> 8);
|
|
|
+ sum += do_usad(a >> 16, b >> 16);
|
|
|
+ sum += do_usad(a >> 24, b >> 24);
|
|
|
+ return sum;
|
|
|
+}
|
|
|
+
|
|
|
+/* For ARMv6 SEL instruction. */
|
|
|
+uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
|
|
|
+{
|
|
|
+ uint32_t mask;
|
|
|
+
|
|
|
+ mask = 0;
|
|
|
+ if (flags & 1) {
|
|
|
+ mask |= 0xff;
|
|
|
+ }
|
|
|
+ if (flags & 2) {
|
|
|
+ mask |= 0xff00;
|
|
|
+ }
|
|
|
+ if (flags & 4) {
|
|
|
+ mask |= 0xff0000;
|
|
|
+ }
|
|
|
+ if (flags & 8) {
|
|
|
+ mask |= 0xff000000;
|
|
|
+ }
|
|
|
+ return (a & mask) | (b & ~mask);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * CRC helpers.
|
|
|
+ * The upper bytes of val (above the number specified by 'bytes') must have
|
|
|
+ * been zeroed out by the caller.
|
|
|
+ */
|
|
|
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
|
|
|
+{
|
|
|
+ uint8_t buf[4];
|
|
|
+
|
|
|
+ stl_le_p(buf, val);
|
|
|
+
|
|
|
+ /* zlib crc32 converts the accumulator and output to one's complement. */
|
|
|
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
|
|
|
+}
|
|
|
+
|
|
|
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
|
|
|
+{
|
|
|
+ uint8_t buf[4];
|
|
|
+
|
|
|
+ stl_le_p(buf, val);
|
|
|
+
|
|
|
+ /* Linux crc32c converts the output to one's complement. */
|
|
|
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
|
|
|
+}
|