123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744 |
- /*
- * Loongson Multimedia Instruction emulation helpers for QEMU.
- *
- * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- #include "cpu.h"
- #include "helper.h"
- /* If the byte ordering doesn't matter, i.e. all columns are treated
- identically, then this union can be used directly. If byte ordering
- does matter, we generally ignore dumping to memory. */
- typedef union {
- uint8_t ub[8];
- int8_t sb[8];
- uint16_t uh[4];
- int16_t sh[4];
- uint32_t uw[2];
- int32_t sw[2];
- uint64_t d;
- } LMIValue;
- /* Some byte ordering issues can be mitigated by XORing in the following. */
- #ifdef HOST_WORDS_BIGENDIAN
- # define BYTE_ORDER_XOR(N) N
- #else
- # define BYTE_ORDER_XOR(N) 0
- #endif
- #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
- #define SATUB(x) (x > 0xff ? 0xff : x)
- #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
- #define SATUH(x) (x > 0xffff ? 0xffff : x)
- #define SATSW(x) \
- (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
- #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
- uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- int r = vs.sb[i] + vt.sb[i];
- vs.sb[i] = SATSB(r);
- }
- return vs.d;
- }
- uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- int r = vs.ub[i] + vt.ub[i];
- vs.ub[i] = SATUB(r);
- }
- return vs.d;
- }
- uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- int r = vs.sh[i] + vt.sh[i];
- vs.sh[i] = SATSH(r);
- }
- return vs.d;
- }
- uint64_t helper_paddush(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- int r = vs.uh[i] + vt.uh[i];
- vs.uh[i] = SATUH(r);
- }
- return vs.d;
- }
- uint64_t helper_paddb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- vs.ub[i] += vt.ub[i];
- }
- return vs.d;
- }
- uint64_t helper_paddh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- vs.uh[i] += vt.uh[i];
- }
- return vs.d;
- }
- uint64_t helper_paddw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 2; ++i) {
- vs.uw[i] += vt.uw[i];
- }
- return vs.d;
- }
- uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- int r = vs.sb[i] - vt.sb[i];
- vs.sb[i] = SATSB(r);
- }
- return vs.d;
- }
- uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- int r = vs.ub[i] - vt.ub[i];
- vs.ub[i] = SATUB(r);
- }
- return vs.d;
- }
- uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- int r = vs.sh[i] - vt.sh[i];
- vs.sh[i] = SATSH(r);
- }
- return vs.d;
- }
- uint64_t helper_psubush(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- int r = vs.uh[i] - vt.uh[i];
- vs.uh[i] = SATUH(r);
- }
- return vs.d;
- }
- uint64_t helper_psubb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- vs.ub[i] -= vt.ub[i];
- }
- return vs.d;
- }
- uint64_t helper_psubh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- vs.uh[i] -= vt.uh[i];
- }
- return vs.d;
- }
- uint64_t helper_psubw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned int i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 2; ++i) {
- vs.uw[i] -= vt.uw[i];
- }
- return vs.d;
- }
- uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(3);
- LMIValue vd, vs;
- unsigned i;
- vs.d = fs;
- vd.d = 0;
- for (i = 0; i < 4; i++, ft >>= 2) {
- vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
- }
- return vd.d;
- }
- uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
- {
- uint64_t fd = 0;
- int64_t tmp;
- tmp = (int32_t)(fs >> 0);
- tmp = SATSH(tmp);
- fd |= (tmp & 0xffff) << 0;
- tmp = (int32_t)(fs >> 32);
- tmp = SATSH(tmp);
- fd |= (tmp & 0xffff) << 16;
- tmp = (int32_t)(ft >> 0);
- tmp = SATSH(tmp);
- fd |= (tmp & 0xffff) << 32;
- tmp = (int32_t)(ft >> 32);
- tmp = SATSH(tmp);
- fd |= (tmp & 0xffff) << 48;
- return fd;
- }
- uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
- {
- uint64_t fd = 0;
- unsigned int i;
- for (i = 0; i < 4; ++i) {
- int16_t tmp = fs >> (i * 16);
- tmp = SATSB(tmp);
- fd |= (uint64_t)(tmp & 0xff) << (i * 8);
- }
- for (i = 0; i < 4; ++i) {
- int16_t tmp = ft >> (i * 16);
- tmp = SATSB(tmp);
- fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
- }
- return fd;
- }
- uint64_t helper_packushb(uint64_t fs, uint64_t ft)
- {
- uint64_t fd = 0;
- unsigned int i;
- for (i = 0; i < 4; ++i) {
- int16_t tmp = fs >> (i * 16);
- tmp = SATUB(tmp);
- fd |= (uint64_t)(tmp & 0xff) << (i * 8);
- }
- for (i = 0; i < 4; ++i) {
- int16_t tmp = ft >> (i * 16);
- tmp = SATUB(tmp);
- fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
- }
- return fd;
- }
- uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
- {
- return (fs & 0xffffffff) | (ft << 32);
- }
- uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
- {
- return (fs >> 32) | (ft & ~0xffffffffull);
- }
- uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(3);
- LMIValue vd, vs, vt;
- vs.d = fs;
- vt.d = ft;
- vd.uh[0 ^ host] = vs.uh[0 ^ host];
- vd.uh[1 ^ host] = vt.uh[0 ^ host];
- vd.uh[2 ^ host] = vs.uh[1 ^ host];
- vd.uh[3 ^ host] = vt.uh[1 ^ host];
- return vd.d;
- }
- uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(3);
- LMIValue vd, vs, vt;
- vs.d = fs;
- vt.d = ft;
- vd.uh[0 ^ host] = vs.uh[2 ^ host];
- vd.uh[1 ^ host] = vt.uh[2 ^ host];
- vd.uh[2 ^ host] = vs.uh[3 ^ host];
- vd.uh[3 ^ host] = vt.uh[3 ^ host];
- return vd.d;
- }
- uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(7);
- LMIValue vd, vs, vt;
- vs.d = fs;
- vt.d = ft;
- vd.ub[0 ^ host] = vs.ub[0 ^ host];
- vd.ub[1 ^ host] = vt.ub[0 ^ host];
- vd.ub[2 ^ host] = vs.ub[1 ^ host];
- vd.ub[3 ^ host] = vt.ub[1 ^ host];
- vd.ub[4 ^ host] = vs.ub[2 ^ host];
- vd.ub[5 ^ host] = vt.ub[2 ^ host];
- vd.ub[6 ^ host] = vs.ub[3 ^ host];
- vd.ub[7 ^ host] = vt.ub[3 ^ host];
- return vd.d;
- }
- uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(7);
- LMIValue vd, vs, vt;
- vs.d = fs;
- vt.d = ft;
- vd.ub[0 ^ host] = vs.ub[4 ^ host];
- vd.ub[1 ^ host] = vt.ub[4 ^ host];
- vd.ub[2 ^ host] = vs.ub[5 ^ host];
- vd.ub[3 ^ host] = vt.ub[5 ^ host];
- vd.ub[4 ^ host] = vs.ub[6 ^ host];
- vd.ub[5 ^ host] = vt.ub[6 ^ host];
- vd.ub[6 ^ host] = vs.ub[7 ^ host];
- vd.ub[7 ^ host] = vt.ub[7 ^ host];
- return vd.d;
- }
- uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
- }
- return vs.d;
- }
- uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; i++) {
- vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
- }
- return vs.d;
- }
- uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
- }
- return vs.d;
- }
- uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
- }
- return vs.d;
- }
- uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
- }
- return vs.d;
- }
- uint64_t helper_pminub(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 2; i++) {
- vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 2; i++) {
- vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; i++) {
- vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; i++) {
- vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
- }
- return vs.d;
- }
- uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; i++) {
- vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
- }
- return vs.d;
- }
- uint64_t helper_psllw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 31) {
- return 0;
- }
- vs.d = fs;
- for (i = 0; i < 2; ++i) {
- vs.uw[i] <<= ft;
- }
- return vs.d;
- }
- uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 31) {
- return 0;
- }
- vs.d = fs;
- for (i = 0; i < 2; ++i) {
- vs.uw[i] >>= ft;
- }
- return vs.d;
- }
- uint64_t helper_psraw(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 31) {
- ft = 31;
- }
- vs.d = fs;
- for (i = 0; i < 2; ++i) {
- vs.sw[i] >>= ft;
- }
- return vs.d;
- }
- uint64_t helper_psllh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 15) {
- return 0;
- }
- vs.d = fs;
- for (i = 0; i < 4; ++i) {
- vs.uh[i] <<= ft;
- }
- return vs.d;
- }
- uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 15) {
- return 0;
- }
- vs.d = fs;
- for (i = 0; i < 4; ++i) {
- vs.uh[i] >>= ft;
- }
- return vs.d;
- }
- uint64_t helper_psrah(uint64_t fs, uint64_t ft)
- {
- LMIValue vs;
- unsigned i;
- ft &= 0x7f;
- if (ft > 15) {
- ft = 15;
- }
- vs.d = fs;
- for (i = 0; i < 4; ++i) {
- vs.sh[i] >>= ft;
- }
- return vs.d;
- }
- uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- vs.sh[i] *= vt.sh[i];
- }
- return vs.d;
- }
- uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- int32_t r = vs.sh[i] * vt.sh[i];
- vs.sh[i] = r >> 16;
- }
- return vs.d;
- }
- uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 4; ++i) {
- uint32_t r = vs.uh[i] * vt.uh[i];
- vs.uh[i] = r >> 16;
- }
- return vs.d;
- }
- uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
- {
- unsigned host = BYTE_ORDER_XOR(3);
- LMIValue vs, vt;
- uint32_t p0, p1;
- vs.d = fs;
- vt.d = ft;
- p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
- p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
- p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
- p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
- return ((uint64_t)p1 << 32) | p0;
- }
- uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
- {
- LMIValue vs, vt;
- unsigned i;
- vs.d = fs;
- vt.d = ft;
- for (i = 0; i < 8; ++i) {
- int r = vs.ub[i] - vt.ub[i];
- vs.ub[i] = (r < 0 ? -r : r);
- }
- return vs.d;
- }
- uint64_t helper_biadd(uint64_t fs)
- {
- unsigned i, fd;
- for (i = fd = 0; i < 8; ++i) {
- fd += (fs >> (i * 8)) & 0xff;
- }
- return fd & 0xffff;
- }
- uint64_t helper_pmovmskb(uint64_t fs)
- {
- unsigned fd = 0;
- fd |= ((fs >> 7) & 1) << 0;
- fd |= ((fs >> 15) & 1) << 1;
- fd |= ((fs >> 23) & 1) << 2;
- fd |= ((fs >> 31) & 1) << 3;
- fd |= ((fs >> 39) & 1) << 4;
- fd |= ((fs >> 47) & 1) << 5;
- fd |= ((fs >> 55) & 1) << 6;
- fd |= ((fs >> 63) & 1) << 7;
- return fd & 0xff;
- }
|