translate-neon.c.inc 120 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161
  1. /*
  2. * ARM translation: AArch32 Neon instructions
  3. *
  4. * Copyright (c) 2003 Fabrice Bellard
  5. * Copyright (c) 2005-2007 CodeSourcery
  6. * Copyright (c) 2007 OpenedHand, Ltd.
  7. * Copyright (c) 2020 Linaro, Ltd.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2 of the License, or (at your option) any later version.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21. */
  22. /*
  23. * This file is intended to be included from translate.c; it uses
  24. * some macros and definitions provided by that file.
  25. * It might be possible to convert it to a standalone .c file eventually.
  26. */
  27. static inline int plus1(DisasContext *s, int x)
  28. {
  29. return x + 1;
  30. }
  31. static inline int rsub_64(DisasContext *s, int x)
  32. {
  33. return 64 - x;
  34. }
  35. static inline int rsub_32(DisasContext *s, int x)
  36. {
  37. return 32 - x;
  38. }
  39. static inline int rsub_16(DisasContext *s, int x)
  40. {
  41. return 16 - x;
  42. }
  43. static inline int rsub_8(DisasContext *s, int x)
  44. {
  45. return 8 - x;
  46. }
  47. /* Include the generated Neon decoder */
  48. #include "decode-neon-dp.c.inc"
  49. #include "decode-neon-ls.c.inc"
  50. #include "decode-neon-shared.c.inc"
  51. /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
  52. * where 0 is the least significant end of the register.
  53. */
  54. static inline long
  55. neon_element_offset(int reg, int element, MemOp size)
  56. {
  57. int element_size = 1 << size;
  58. int ofs = element * element_size;
  59. #ifdef HOST_WORDS_BIGENDIAN
  60. /* Calculate the offset assuming fully little-endian,
  61. * then XOR to account for the order of the 8-byte units.
  62. */
  63. if (element_size < 8) {
  64. ofs ^= 8 - element_size;
  65. }
  66. #endif
  67. return neon_reg_offset(reg, 0) + ofs;
  68. }
  69. static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
  70. {
  71. long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  72. switch (mop) {
  73. case MO_UB:
  74. tcg_gen_ld8u_i32(var, cpu_env, offset);
  75. break;
  76. case MO_UW:
  77. tcg_gen_ld16u_i32(var, cpu_env, offset);
  78. break;
  79. case MO_UL:
  80. tcg_gen_ld_i32(var, cpu_env, offset);
  81. break;
  82. default:
  83. g_assert_not_reached();
  84. }
  85. }
  86. static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
  87. {
  88. long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
  89. switch (mop) {
  90. case MO_UB:
  91. tcg_gen_ld8u_i64(var, cpu_env, offset);
  92. break;
  93. case MO_UW:
  94. tcg_gen_ld16u_i64(var, cpu_env, offset);
  95. break;
  96. case MO_UL:
  97. tcg_gen_ld32u_i64(var, cpu_env, offset);
  98. break;
  99. case MO_Q:
  100. tcg_gen_ld_i64(var, cpu_env, offset);
  101. break;
  102. default:
  103. g_assert_not_reached();
  104. }
  105. }
  106. static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
  107. {
  108. long offset = neon_element_offset(reg, ele, size);
  109. switch (size) {
  110. case MO_8:
  111. tcg_gen_st8_i32(var, cpu_env, offset);
  112. break;
  113. case MO_16:
  114. tcg_gen_st16_i32(var, cpu_env, offset);
  115. break;
  116. case MO_32:
  117. tcg_gen_st_i32(var, cpu_env, offset);
  118. break;
  119. default:
  120. g_assert_not_reached();
  121. }
  122. }
  123. static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
  124. {
  125. long offset = neon_element_offset(reg, ele, size);
  126. switch (size) {
  127. case MO_8:
  128. tcg_gen_st8_i64(var, cpu_env, offset);
  129. break;
  130. case MO_16:
  131. tcg_gen_st16_i64(var, cpu_env, offset);
  132. break;
  133. case MO_32:
  134. tcg_gen_st32_i64(var, cpu_env, offset);
  135. break;
  136. case MO_64:
  137. tcg_gen_st_i64(var, cpu_env, offset);
  138. break;
  139. default:
  140. g_assert_not_reached();
  141. }
  142. }
  143. static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
  144. {
  145. int opr_sz;
  146. TCGv_ptr fpst;
  147. gen_helper_gvec_3_ptr *fn_gvec_ptr;
  148. if (!dc_isar_feature(aa32_vcma, s)
  149. || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
  150. return false;
  151. }
  152. /* UNDEF accesses to D16-D31 if they don't exist. */
  153. if (!dc_isar_feature(aa32_simd_r32, s) &&
  154. ((a->vd | a->vn | a->vm) & 0x10)) {
  155. return false;
  156. }
  157. if ((a->vn | a->vm | a->vd) & a->q) {
  158. return false;
  159. }
  160. if (!vfp_access_check(s)) {
  161. return true;
  162. }
  163. opr_sz = (1 + a->q) * 8;
  164. fpst = get_fpstatus_ptr(1);
  165. fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
  166. tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
  167. vfp_reg_offset(1, a->vn),
  168. vfp_reg_offset(1, a->vm),
  169. fpst, opr_sz, opr_sz, a->rot,
  170. fn_gvec_ptr);
  171. tcg_temp_free_ptr(fpst);
  172. return true;
  173. }
  174. static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
  175. {
  176. int opr_sz;
  177. TCGv_ptr fpst;
  178. gen_helper_gvec_3_ptr *fn_gvec_ptr;
  179. if (!dc_isar_feature(aa32_vcma, s)
  180. || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
  181. return false;
  182. }
  183. /* UNDEF accesses to D16-D31 if they don't exist. */
  184. if (!dc_isar_feature(aa32_simd_r32, s) &&
  185. ((a->vd | a->vn | a->vm) & 0x10)) {
  186. return false;
  187. }
  188. if ((a->vn | a->vm | a->vd) & a->q) {
  189. return false;
  190. }
  191. if (!vfp_access_check(s)) {
  192. return true;
  193. }
  194. opr_sz = (1 + a->q) * 8;
  195. fpst = get_fpstatus_ptr(1);
  196. fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
  197. tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
  198. vfp_reg_offset(1, a->vn),
  199. vfp_reg_offset(1, a->vm),
  200. fpst, opr_sz, opr_sz, a->rot,
  201. fn_gvec_ptr);
  202. tcg_temp_free_ptr(fpst);
  203. return true;
  204. }
  205. static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
  206. {
  207. int opr_sz;
  208. gen_helper_gvec_3 *fn_gvec;
  209. if (!dc_isar_feature(aa32_dp, s)) {
  210. return false;
  211. }
  212. /* UNDEF accesses to D16-D31 if they don't exist. */
  213. if (!dc_isar_feature(aa32_simd_r32, s) &&
  214. ((a->vd | a->vn | a->vm) & 0x10)) {
  215. return false;
  216. }
  217. if ((a->vn | a->vm | a->vd) & a->q) {
  218. return false;
  219. }
  220. if (!vfp_access_check(s)) {
  221. return true;
  222. }
  223. opr_sz = (1 + a->q) * 8;
  224. fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
  225. tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
  226. vfp_reg_offset(1, a->vn),
  227. vfp_reg_offset(1, a->vm),
  228. opr_sz, opr_sz, 0, fn_gvec);
  229. return true;
  230. }
  231. static bool trans_VFML(DisasContext *s, arg_VFML *a)
  232. {
  233. int opr_sz;
  234. if (!dc_isar_feature(aa32_fhm, s)) {
  235. return false;
  236. }
  237. /* UNDEF accesses to D16-D31 if they don't exist. */
  238. if (!dc_isar_feature(aa32_simd_r32, s) &&
  239. (a->vd & 0x10)) {
  240. return false;
  241. }
  242. if (a->vd & a->q) {
  243. return false;
  244. }
  245. if (!vfp_access_check(s)) {
  246. return true;
  247. }
  248. opr_sz = (1 + a->q) * 8;
  249. tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
  250. vfp_reg_offset(a->q, a->vn),
  251. vfp_reg_offset(a->q, a->vm),
  252. cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
  253. gen_helper_gvec_fmlal_a32);
  254. return true;
  255. }
  256. static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
  257. {
  258. gen_helper_gvec_3_ptr *fn_gvec_ptr;
  259. int opr_sz;
  260. TCGv_ptr fpst;
  261. if (!dc_isar_feature(aa32_vcma, s)) {
  262. return false;
  263. }
  264. if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
  265. return false;
  266. }
  267. /* UNDEF accesses to D16-D31 if they don't exist. */
  268. if (!dc_isar_feature(aa32_simd_r32, s) &&
  269. ((a->vd | a->vn | a->vm) & 0x10)) {
  270. return false;
  271. }
  272. if ((a->vd | a->vn) & a->q) {
  273. return false;
  274. }
  275. if (!vfp_access_check(s)) {
  276. return true;
  277. }
  278. fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
  279. : gen_helper_gvec_fcmlah_idx);
  280. opr_sz = (1 + a->q) * 8;
  281. fpst = get_fpstatus_ptr(1);
  282. tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
  283. vfp_reg_offset(1, a->vn),
  284. vfp_reg_offset(1, a->vm),
  285. fpst, opr_sz, opr_sz,
  286. (a->index << 2) | a->rot, fn_gvec_ptr);
  287. tcg_temp_free_ptr(fpst);
  288. return true;
  289. }
  290. static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
  291. {
  292. gen_helper_gvec_3 *fn_gvec;
  293. int opr_sz;
  294. TCGv_ptr fpst;
  295. if (!dc_isar_feature(aa32_dp, s)) {
  296. return false;
  297. }
  298. /* UNDEF accesses to D16-D31 if they don't exist. */
  299. if (!dc_isar_feature(aa32_simd_r32, s) &&
  300. ((a->vd | a->vn) & 0x10)) {
  301. return false;
  302. }
  303. if ((a->vd | a->vn) & a->q) {
  304. return false;
  305. }
  306. if (!vfp_access_check(s)) {
  307. return true;
  308. }
  309. fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
  310. opr_sz = (1 + a->q) * 8;
  311. fpst = get_fpstatus_ptr(1);
  312. tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd),
  313. vfp_reg_offset(1, a->vn),
  314. vfp_reg_offset(1, a->rm),
  315. opr_sz, opr_sz, a->index, fn_gvec);
  316. tcg_temp_free_ptr(fpst);
  317. return true;
  318. }
  319. static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
  320. {
  321. int opr_sz;
  322. if (!dc_isar_feature(aa32_fhm, s)) {
  323. return false;
  324. }
  325. /* UNDEF accesses to D16-D31 if they don't exist. */
  326. if (!dc_isar_feature(aa32_simd_r32, s) &&
  327. ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
  328. return false;
  329. }
  330. if (a->vd & a->q) {
  331. return false;
  332. }
  333. if (!vfp_access_check(s)) {
  334. return true;
  335. }
  336. opr_sz = (1 + a->q) * 8;
  337. tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
  338. vfp_reg_offset(a->q, a->vn),
  339. vfp_reg_offset(a->q, a->rm),
  340. cpu_env, opr_sz, opr_sz,
  341. (a->index << 2) | a->s, /* is_2 == 0 */
  342. gen_helper_gvec_fmlal_idx_a32);
  343. return true;
  344. }
  345. static struct {
  346. int nregs;
  347. int interleave;
  348. int spacing;
  349. } const neon_ls_element_type[11] = {
  350. {1, 4, 1},
  351. {1, 4, 2},
  352. {4, 1, 1},
  353. {2, 2, 2},
  354. {1, 3, 1},
  355. {1, 3, 2},
  356. {3, 1, 1},
  357. {1, 1, 1},
  358. {1, 2, 1},
  359. {1, 2, 2},
  360. {2, 1, 1}
  361. };
  362. static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
  363. int stride)
  364. {
  365. if (rm != 15) {
  366. TCGv_i32 base;
  367. base = load_reg(s, rn);
  368. if (rm == 13) {
  369. tcg_gen_addi_i32(base, base, stride);
  370. } else {
  371. TCGv_i32 index;
  372. index = load_reg(s, rm);
  373. tcg_gen_add_i32(base, base, index);
  374. tcg_temp_free_i32(index);
  375. }
  376. store_reg(s, rn, base);
  377. }
  378. }
  379. static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
  380. {
  381. /* Neon load/store multiple structures */
  382. int nregs, interleave, spacing, reg, n;
  383. MemOp endian = s->be_data;
  384. int mmu_idx = get_mem_index(s);
  385. int size = a->size;
  386. TCGv_i64 tmp64;
  387. TCGv_i32 addr, tmp;
  388. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  389. return false;
  390. }
  391. /* UNDEF accesses to D16-D31 if they don't exist */
  392. if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
  393. return false;
  394. }
  395. if (a->itype > 10) {
  396. return false;
  397. }
  398. /* Catch UNDEF cases for bad values of align field */
  399. switch (a->itype & 0xc) {
  400. case 4:
  401. if (a->align >= 2) {
  402. return false;
  403. }
  404. break;
  405. case 8:
  406. if (a->align == 3) {
  407. return false;
  408. }
  409. break;
  410. default:
  411. break;
  412. }
  413. nregs = neon_ls_element_type[a->itype].nregs;
  414. interleave = neon_ls_element_type[a->itype].interleave;
  415. spacing = neon_ls_element_type[a->itype].spacing;
  416. if (size == 3 && (interleave | spacing) != 1) {
  417. return false;
  418. }
  419. if (!vfp_access_check(s)) {
  420. return true;
  421. }
  422. /* For our purposes, bytes are always little-endian. */
  423. if (size == 0) {
  424. endian = MO_LE;
  425. }
  426. /*
  427. * Consecutive little-endian elements from a single register
  428. * can be promoted to a larger little-endian operation.
  429. */
  430. if (interleave == 1 && endian == MO_LE) {
  431. size = 3;
  432. }
  433. tmp64 = tcg_temp_new_i64();
  434. addr = tcg_temp_new_i32();
  435. tmp = tcg_const_i32(1 << size);
  436. load_reg_var(s, addr, a->rn);
  437. for (reg = 0; reg < nregs; reg++) {
  438. for (n = 0; n < 8 >> size; n++) {
  439. int xs;
  440. for (xs = 0; xs < interleave; xs++) {
  441. int tt = a->vd + reg + spacing * xs;
  442. if (a->l) {
  443. gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
  444. neon_store_element64(tt, n, size, tmp64);
  445. } else {
  446. neon_load_element64(tmp64, tt, n, size);
  447. gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
  448. }
  449. tcg_gen_add_i32(addr, addr, tmp);
  450. }
  451. }
  452. }
  453. tcg_temp_free_i32(addr);
  454. tcg_temp_free_i32(tmp);
  455. tcg_temp_free_i64(tmp64);
  456. gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
  457. return true;
  458. }
  459. static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
  460. {
  461. /* Neon load single structure to all lanes */
  462. int reg, stride, vec_size;
  463. int vd = a->vd;
  464. int size = a->size;
  465. int nregs = a->n + 1;
  466. TCGv_i32 addr, tmp;
  467. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  468. return false;
  469. }
  470. /* UNDEF accesses to D16-D31 if they don't exist */
  471. if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
  472. return false;
  473. }
  474. if (size == 3) {
  475. if (nregs != 4 || a->a == 0) {
  476. return false;
  477. }
  478. /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
  479. size = 2;
  480. }
  481. if (nregs == 1 && a->a == 1 && size == 0) {
  482. return false;
  483. }
  484. if (nregs == 3 && a->a == 1) {
  485. return false;
  486. }
  487. if (!vfp_access_check(s)) {
  488. return true;
  489. }
  490. /*
  491. * VLD1 to all lanes: T bit indicates how many Dregs to write.
  492. * VLD2/3/4 to all lanes: T bit indicates register stride.
  493. */
  494. stride = a->t ? 2 : 1;
  495. vec_size = nregs == 1 ? stride * 8 : 8;
  496. tmp = tcg_temp_new_i32();
  497. addr = tcg_temp_new_i32();
  498. load_reg_var(s, addr, a->rn);
  499. for (reg = 0; reg < nregs; reg++) {
  500. gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
  501. s->be_data | size);
  502. if ((vd & 1) && vec_size == 16) {
  503. /*
  504. * We cannot write 16 bytes at once because the
  505. * destination is unaligned.
  506. */
  507. tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
  508. 8, 8, tmp);
  509. tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
  510. neon_reg_offset(vd, 0), 8, 8);
  511. } else {
  512. tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
  513. vec_size, vec_size, tmp);
  514. }
  515. tcg_gen_addi_i32(addr, addr, 1 << size);
  516. vd += stride;
  517. }
  518. tcg_temp_free_i32(tmp);
  519. tcg_temp_free_i32(addr);
  520. gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
  521. return true;
  522. }
  523. static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
  524. {
  525. /* Neon load/store single structure to one lane */
  526. int reg;
  527. int nregs = a->n + 1;
  528. int vd = a->vd;
  529. TCGv_i32 addr, tmp;
  530. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  531. return false;
  532. }
  533. /* UNDEF accesses to D16-D31 if they don't exist */
  534. if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
  535. return false;
  536. }
  537. /* Catch the UNDEF cases. This is unavoidably a bit messy. */
  538. switch (nregs) {
  539. case 1:
  540. if (((a->align & (1 << a->size)) != 0) ||
  541. (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
  542. return false;
  543. }
  544. break;
  545. case 3:
  546. if ((a->align & 1) != 0) {
  547. return false;
  548. }
  549. /* fall through */
  550. case 2:
  551. if (a->size == 2 && (a->align & 2) != 0) {
  552. return false;
  553. }
  554. break;
  555. case 4:
  556. if ((a->size == 2) && ((a->align & 3) == 3)) {
  557. return false;
  558. }
  559. break;
  560. default:
  561. abort();
  562. }
  563. if ((vd + a->stride * (nregs - 1)) > 31) {
  564. /*
  565. * Attempts to write off the end of the register file are
  566. * UNPREDICTABLE; we choose to UNDEF because otherwise we would
  567. * access off the end of the array that holds the register data.
  568. */
  569. return false;
  570. }
  571. if (!vfp_access_check(s)) {
  572. return true;
  573. }
  574. tmp = tcg_temp_new_i32();
  575. addr = tcg_temp_new_i32();
  576. load_reg_var(s, addr, a->rn);
  577. /*
  578. * TODO: if we implemented alignment exceptions, we should check
  579. * addr against the alignment encoded in a->align here.
  580. */
  581. for (reg = 0; reg < nregs; reg++) {
  582. if (a->l) {
  583. gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
  584. s->be_data | a->size);
  585. neon_store_element(vd, a->reg_idx, a->size, tmp);
  586. } else { /* Store */
  587. neon_load_element(tmp, vd, a->reg_idx, a->size);
  588. gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
  589. s->be_data | a->size);
  590. }
  591. vd += a->stride;
  592. tcg_gen_addi_i32(addr, addr, 1 << a->size);
  593. }
  594. tcg_temp_free_i32(addr);
  595. tcg_temp_free_i32(tmp);
  596. gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
  597. return true;
  598. }
  599. static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
  600. {
  601. int vec_size = a->q ? 16 : 8;
  602. int rd_ofs = neon_reg_offset(a->vd, 0);
  603. int rn_ofs = neon_reg_offset(a->vn, 0);
  604. int rm_ofs = neon_reg_offset(a->vm, 0);
  605. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  606. return false;
  607. }
  608. /* UNDEF accesses to D16-D31 if they don't exist. */
  609. if (!dc_isar_feature(aa32_simd_r32, s) &&
  610. ((a->vd | a->vn | a->vm) & 0x10)) {
  611. return false;
  612. }
  613. if ((a->vn | a->vm | a->vd) & a->q) {
  614. return false;
  615. }
  616. if (!vfp_access_check(s)) {
  617. return true;
  618. }
  619. fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
  620. return true;
  621. }
  622. #define DO_3SAME(INSN, FUNC) \
  623. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  624. { \
  625. return do_3same(s, a, FUNC); \
  626. }
  627. DO_3SAME(VADD, tcg_gen_gvec_add)
  628. DO_3SAME(VSUB, tcg_gen_gvec_sub)
  629. DO_3SAME(VAND, tcg_gen_gvec_and)
  630. DO_3SAME(VBIC, tcg_gen_gvec_andc)
  631. DO_3SAME(VORR, tcg_gen_gvec_or)
  632. DO_3SAME(VORN, tcg_gen_gvec_orc)
  633. DO_3SAME(VEOR, tcg_gen_gvec_xor)
  634. DO_3SAME(VSHL_S, gen_gvec_sshl)
  635. DO_3SAME(VSHL_U, gen_gvec_ushl)
  636. DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
  637. DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
  638. DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
  639. DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
  640. /* These insns are all gvec_bitsel but with the inputs in various orders. */
  641. #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
  642. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  643. uint32_t rn_ofs, uint32_t rm_ofs, \
  644. uint32_t oprsz, uint32_t maxsz) \
  645. { \
  646. tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
  647. } \
  648. DO_3SAME(INSN, gen_##INSN##_3s)
  649. DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
  650. DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
  651. DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
  652. #define DO_3SAME_NO_SZ_3(INSN, FUNC) \
  653. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  654. { \
  655. if (a->size == 3) { \
  656. return false; \
  657. } \
  658. return do_3same(s, a, FUNC); \
  659. }
  660. DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
  661. DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
  662. DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
  663. DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
  664. DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
  665. DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
  666. DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
  667. DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
  668. DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
  669. DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
  670. DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
  671. DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
  672. #define DO_3SAME_CMP(INSN, COND) \
  673. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  674. uint32_t rn_ofs, uint32_t rm_ofs, \
  675. uint32_t oprsz, uint32_t maxsz) \
  676. { \
  677. tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
  678. } \
  679. DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
  680. DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
  681. DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
  682. DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
  683. DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
  684. DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
  685. #define WRAP_OOL_FN(WRAPNAME, FUNC) \
  686. static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
  687. uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
  688. { \
  689. tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
  690. }
  691. WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
  692. static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
  693. {
  694. if (a->size != 0) {
  695. return false;
  696. }
  697. return do_3same(s, a, gen_VMUL_p_3s);
  698. }
  699. #define DO_VQRDMLAH(INSN, FUNC) \
  700. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  701. { \
  702. if (!dc_isar_feature(aa32_rdm, s)) { \
  703. return false; \
  704. } \
  705. if (a->size != 1 && a->size != 2) { \
  706. return false; \
  707. } \
  708. return do_3same(s, a, FUNC); \
  709. }
  710. DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
  711. DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
  712. #define DO_SHA1(NAME, FUNC) \
  713. WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
  714. static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
  715. { \
  716. if (!dc_isar_feature(aa32_sha1, s)) { \
  717. return false; \
  718. } \
  719. return do_3same(s, a, gen_##NAME##_3s); \
  720. }
  721. DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
  722. DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
  723. DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
  724. DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
  725. #define DO_SHA2(NAME, FUNC) \
  726. WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
  727. static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
  728. { \
  729. if (!dc_isar_feature(aa32_sha2, s)) { \
  730. return false; \
  731. } \
  732. return do_3same(s, a, gen_##NAME##_3s); \
  733. }
  734. DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
  735. DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
  736. DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
  737. #define DO_3SAME_64(INSN, FUNC) \
  738. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  739. uint32_t rn_ofs, uint32_t rm_ofs, \
  740. uint32_t oprsz, uint32_t maxsz) \
  741. { \
  742. static const GVecGen3 op = { .fni8 = FUNC }; \
  743. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
  744. } \
  745. DO_3SAME(INSN, gen_##INSN##_3s)
  746. #define DO_3SAME_64_ENV(INSN, FUNC) \
  747. static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
  748. { \
  749. FUNC(d, cpu_env, n, m); \
  750. } \
  751. DO_3SAME_64(INSN, gen_##INSN##_elt)
  752. DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
  753. DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
  754. DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
  755. DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
  756. DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
  757. DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
  758. #define DO_3SAME_32(INSN, FUNC) \
  759. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  760. uint32_t rn_ofs, uint32_t rm_ofs, \
  761. uint32_t oprsz, uint32_t maxsz) \
  762. { \
  763. static const GVecGen3 ops[4] = { \
  764. { .fni4 = gen_helper_neon_##FUNC##8 }, \
  765. { .fni4 = gen_helper_neon_##FUNC##16 }, \
  766. { .fni4 = gen_helper_neon_##FUNC##32 }, \
  767. { 0 }, \
  768. }; \
  769. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
  770. } \
  771. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  772. { \
  773. if (a->size > 2) { \
  774. return false; \
  775. } \
  776. return do_3same(s, a, gen_##INSN##_3s); \
  777. }
  778. /*
  779. * Some helper functions need to be passed the cpu_env. In order
  780. * to use those with the gvec APIs like tcg_gen_gvec_3() we need
  781. * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
  782. * and which call a NeonGenTwoOpEnvFn().
  783. */
  784. #define WRAP_ENV_FN(WRAPNAME, FUNC) \
  785. static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
  786. { \
  787. FUNC(d, cpu_env, n, m); \
  788. }
  789. #define DO_3SAME_32_ENV(INSN, FUNC) \
  790. WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
  791. WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
  792. WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
  793. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  794. uint32_t rn_ofs, uint32_t rm_ofs, \
  795. uint32_t oprsz, uint32_t maxsz) \
  796. { \
  797. static const GVecGen3 ops[4] = { \
  798. { .fni4 = gen_##INSN##_tramp8 }, \
  799. { .fni4 = gen_##INSN##_tramp16 }, \
  800. { .fni4 = gen_##INSN##_tramp32 }, \
  801. { 0 }, \
  802. }; \
  803. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
  804. } \
  805. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  806. { \
  807. if (a->size > 2) { \
  808. return false; \
  809. } \
  810. return do_3same(s, a, gen_##INSN##_3s); \
  811. }
  812. DO_3SAME_32(VHADD_S, hadd_s)
  813. DO_3SAME_32(VHADD_U, hadd_u)
  814. DO_3SAME_32(VHSUB_S, hsub_s)
  815. DO_3SAME_32(VHSUB_U, hsub_u)
  816. DO_3SAME_32(VRHADD_S, rhadd_s)
  817. DO_3SAME_32(VRHADD_U, rhadd_u)
  818. DO_3SAME_32(VRSHL_S, rshl_s)
  819. DO_3SAME_32(VRSHL_U, rshl_u)
  820. DO_3SAME_32_ENV(VQSHL_S, qshl_s)
  821. DO_3SAME_32_ENV(VQSHL_U, qshl_u)
  822. DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
  823. DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
  824. static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
  825. {
  826. /* Operations handled pairwise 32 bits at a time */
  827. TCGv_i32 tmp, tmp2, tmp3;
  828. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  829. return false;
  830. }
  831. /* UNDEF accesses to D16-D31 if they don't exist. */
  832. if (!dc_isar_feature(aa32_simd_r32, s) &&
  833. ((a->vd | a->vn | a->vm) & 0x10)) {
  834. return false;
  835. }
  836. if (a->size == 3) {
  837. return false;
  838. }
  839. if (!vfp_access_check(s)) {
  840. return true;
  841. }
  842. assert(a->q == 0); /* enforced by decode patterns */
  843. /*
  844. * Note that we have to be careful not to clobber the source operands
  845. * in the "vm == vd" case by storing the result of the first pass too
  846. * early. Since Q is 0 there are always just two passes, so instead
  847. * of a complicated loop over each pass we just unroll.
  848. */
  849. tmp = neon_load_reg(a->vn, 0);
  850. tmp2 = neon_load_reg(a->vn, 1);
  851. fn(tmp, tmp, tmp2);
  852. tcg_temp_free_i32(tmp2);
  853. tmp3 = neon_load_reg(a->vm, 0);
  854. tmp2 = neon_load_reg(a->vm, 1);
  855. fn(tmp3, tmp3, tmp2);
  856. tcg_temp_free_i32(tmp2);
  857. neon_store_reg(a->vd, 0, tmp);
  858. neon_store_reg(a->vd, 1, tmp3);
  859. return true;
  860. }
  861. #define DO_3SAME_PAIR(INSN, func) \
  862. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  863. { \
  864. static NeonGenTwoOpFn * const fns[] = { \
  865. gen_helper_neon_##func##8, \
  866. gen_helper_neon_##func##16, \
  867. gen_helper_neon_##func##32, \
  868. }; \
  869. if (a->size > 2) { \
  870. return false; \
  871. } \
  872. return do_3same_pair(s, a, fns[a->size]); \
  873. }
  874. /* 32-bit pairwise ops end up the same as the elementwise versions. */
  875. #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
  876. #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
  877. #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
  878. #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
  879. #define gen_helper_neon_padd_u32 tcg_gen_add_i32
  880. DO_3SAME_PAIR(VPMAX_S, pmax_s)
  881. DO_3SAME_PAIR(VPMIN_S, pmin_s)
  882. DO_3SAME_PAIR(VPMAX_U, pmax_u)
  883. DO_3SAME_PAIR(VPMIN_U, pmin_u)
  884. DO_3SAME_PAIR(VPADD, padd_u)
  885. #define DO_3SAME_VQDMULH(INSN, FUNC) \
  886. WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
  887. WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
  888. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  889. uint32_t rn_ofs, uint32_t rm_ofs, \
  890. uint32_t oprsz, uint32_t maxsz) \
  891. { \
  892. static const GVecGen3 ops[2] = { \
  893. { .fni4 = gen_##INSN##_tramp16 }, \
  894. { .fni4 = gen_##INSN##_tramp32 }, \
  895. }; \
  896. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
  897. } \
  898. static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
  899. { \
  900. if (a->size != 1 && a->size != 2) { \
  901. return false; \
  902. } \
  903. return do_3same(s, a, gen_##INSN##_3s); \
  904. }
  905. DO_3SAME_VQDMULH(VQDMULH, qdmulh)
  906. DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
  907. static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
  908. bool reads_vd)
  909. {
  910. /*
  911. * FP operations handled elementwise 32 bits at a time.
  912. * If reads_vd is true then the old value of Vd will be
  913. * loaded before calling the callback function. This is
  914. * used for multiply-accumulate type operations.
  915. */
  916. TCGv_i32 tmp, tmp2;
  917. int pass;
  918. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  919. return false;
  920. }
  921. /* UNDEF accesses to D16-D31 if they don't exist. */
  922. if (!dc_isar_feature(aa32_simd_r32, s) &&
  923. ((a->vd | a->vn | a->vm) & 0x10)) {
  924. return false;
  925. }
  926. if ((a->vn | a->vm | a->vd) & a->q) {
  927. return false;
  928. }
  929. if (!vfp_access_check(s)) {
  930. return true;
  931. }
  932. TCGv_ptr fpstatus = get_fpstatus_ptr(1);
  933. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  934. tmp = neon_load_reg(a->vn, pass);
  935. tmp2 = neon_load_reg(a->vm, pass);
  936. if (reads_vd) {
  937. TCGv_i32 tmp_rd = neon_load_reg(a->vd, pass);
  938. fn(tmp_rd, tmp, tmp2, fpstatus);
  939. neon_store_reg(a->vd, pass, tmp_rd);
  940. tcg_temp_free_i32(tmp);
  941. } else {
  942. fn(tmp, tmp, tmp2, fpstatus);
  943. neon_store_reg(a->vd, pass, tmp);
  944. }
  945. tcg_temp_free_i32(tmp2);
  946. }
  947. tcg_temp_free_ptr(fpstatus);
  948. return true;
  949. }
  950. /*
  951. * For all the functions using this macro, size == 1 means fp16,
  952. * which is an architecture extension we don't implement yet.
  953. */
  954. #define DO_3S_FP_GVEC(INSN,FUNC) \
  955. static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
  956. uint32_t rn_ofs, uint32_t rm_ofs, \
  957. uint32_t oprsz, uint32_t maxsz) \
  958. { \
  959. TCGv_ptr fpst = get_fpstatus_ptr(1); \
  960. tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
  961. oprsz, maxsz, 0, FUNC); \
  962. tcg_temp_free_ptr(fpst); \
  963. } \
  964. static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
  965. { \
  966. if (a->size != 0) { \
  967. /* TODO fp16 support */ \
  968. return false; \
  969. } \
  970. return do_3same(s, a, gen_##INSN##_3s); \
  971. }
  972. DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
  973. DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
  974. DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
  975. DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
  976. /*
  977. * For all the functions using this macro, size == 1 means fp16,
  978. * which is an architecture extension we don't implement yet.
  979. */
  980. #define DO_3S_FP(INSN,FUNC,READS_VD) \
  981. static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
  982. { \
  983. if (a->size != 0) { \
  984. /* TODO fp16 support */ \
  985. return false; \
  986. } \
  987. return do_3same_fp(s, a, FUNC, READS_VD); \
  988. }
  989. DO_3S_FP(VCEQ, gen_helper_neon_ceq_f32, false)
  990. DO_3S_FP(VCGE, gen_helper_neon_cge_f32, false)
  991. DO_3S_FP(VCGT, gen_helper_neon_cgt_f32, false)
  992. DO_3S_FP(VACGE, gen_helper_neon_acge_f32, false)
  993. DO_3S_FP(VACGT, gen_helper_neon_acgt_f32, false)
  994. DO_3S_FP(VMAX, gen_helper_vfp_maxs, false)
  995. DO_3S_FP(VMIN, gen_helper_vfp_mins, false)
  996. static void gen_VMLA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
  997. TCGv_ptr fpstatus)
  998. {
  999. gen_helper_vfp_muls(vn, vn, vm, fpstatus);
  1000. gen_helper_vfp_adds(vd, vd, vn, fpstatus);
  1001. }
  1002. static void gen_VMLS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
  1003. TCGv_ptr fpstatus)
  1004. {
  1005. gen_helper_vfp_muls(vn, vn, vm, fpstatus);
  1006. gen_helper_vfp_subs(vd, vd, vn, fpstatus);
  1007. }
  1008. DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
  1009. DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)
  1010. static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
  1011. {
  1012. if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
  1013. return false;
  1014. }
  1015. if (a->size != 0) {
  1016. /* TODO fp16 support */
  1017. return false;
  1018. }
  1019. return do_3same_fp(s, a, gen_helper_vfp_maxnums, false);
  1020. }
  1021. static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
  1022. {
  1023. if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
  1024. return false;
  1025. }
  1026. if (a->size != 0) {
  1027. /* TODO fp16 support */
  1028. return false;
  1029. }
  1030. return do_3same_fp(s, a, gen_helper_vfp_minnums, false);
  1031. }
  1032. WRAP_ENV_FN(gen_VRECPS_tramp, gen_helper_recps_f32)
  1033. static void gen_VRECPS_fp_3s(unsigned vece, uint32_t rd_ofs,
  1034. uint32_t rn_ofs, uint32_t rm_ofs,
  1035. uint32_t oprsz, uint32_t maxsz)
  1036. {
  1037. static const GVecGen3 ops = { .fni4 = gen_VRECPS_tramp };
  1038. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
  1039. }
  1040. static bool trans_VRECPS_fp_3s(DisasContext *s, arg_3same *a)
  1041. {
  1042. if (a->size != 0) {
  1043. /* TODO fp16 support */
  1044. return false;
  1045. }
  1046. return do_3same(s, a, gen_VRECPS_fp_3s);
  1047. }
  1048. WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
  1049. static void gen_VRSQRTS_fp_3s(unsigned vece, uint32_t rd_ofs,
  1050. uint32_t rn_ofs, uint32_t rm_ofs,
  1051. uint32_t oprsz, uint32_t maxsz)
  1052. {
  1053. static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
  1054. tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
  1055. }
  1056. static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
  1057. {
  1058. if (a->size != 0) {
  1059. /* TODO fp16 support */
  1060. return false;
  1061. }
  1062. return do_3same(s, a, gen_VRSQRTS_fp_3s);
  1063. }
  1064. static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
  1065. TCGv_ptr fpstatus)
  1066. {
  1067. gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
  1068. }
  1069. static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
  1070. {
  1071. if (!dc_isar_feature(aa32_simdfmac, s)) {
  1072. return false;
  1073. }
  1074. if (a->size != 0) {
  1075. /* TODO fp16 support */
  1076. return false;
  1077. }
  1078. return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
  1079. }
  1080. static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
  1081. TCGv_ptr fpstatus)
  1082. {
  1083. gen_helper_vfp_negs(vn, vn);
  1084. gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
  1085. }
  1086. static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
  1087. {
  1088. if (!dc_isar_feature(aa32_simdfmac, s)) {
  1089. return false;
  1090. }
  1091. if (a->size != 0) {
  1092. /* TODO fp16 support */
  1093. return false;
  1094. }
  1095. return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
  1096. }
  1097. static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
  1098. {
  1099. /* FP operations handled pairwise 32 bits at a time */
  1100. TCGv_i32 tmp, tmp2, tmp3;
  1101. TCGv_ptr fpstatus;
  1102. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1103. return false;
  1104. }
  1105. /* UNDEF accesses to D16-D31 if they don't exist. */
  1106. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1107. ((a->vd | a->vn | a->vm) & 0x10)) {
  1108. return false;
  1109. }
  1110. if (!vfp_access_check(s)) {
  1111. return true;
  1112. }
  1113. assert(a->q == 0); /* enforced by decode patterns */
  1114. /*
  1115. * Note that we have to be careful not to clobber the source operands
  1116. * in the "vm == vd" case by storing the result of the first pass too
  1117. * early. Since Q is 0 there are always just two passes, so instead
  1118. * of a complicated loop over each pass we just unroll.
  1119. */
  1120. fpstatus = get_fpstatus_ptr(1);
  1121. tmp = neon_load_reg(a->vn, 0);
  1122. tmp2 = neon_load_reg(a->vn, 1);
  1123. fn(tmp, tmp, tmp2, fpstatus);
  1124. tcg_temp_free_i32(tmp2);
  1125. tmp3 = neon_load_reg(a->vm, 0);
  1126. tmp2 = neon_load_reg(a->vm, 1);
  1127. fn(tmp3, tmp3, tmp2, fpstatus);
  1128. tcg_temp_free_i32(tmp2);
  1129. tcg_temp_free_ptr(fpstatus);
  1130. neon_store_reg(a->vd, 0, tmp);
  1131. neon_store_reg(a->vd, 1, tmp3);
  1132. return true;
  1133. }
  1134. /*
  1135. * For all the functions using this macro, size == 1 means fp16,
  1136. * which is an architecture extension we don't implement yet.
  1137. */
  1138. #define DO_3S_FP_PAIR(INSN,FUNC) \
  1139. static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
  1140. { \
  1141. if (a->size != 0) { \
  1142. /* TODO fp16 support */ \
  1143. return false; \
  1144. } \
  1145. return do_3same_fp_pair(s, a, FUNC); \
  1146. }
  1147. DO_3S_FP_PAIR(VPADD, gen_helper_vfp_adds)
  1148. DO_3S_FP_PAIR(VPMAX, gen_helper_vfp_maxs)
  1149. DO_3S_FP_PAIR(VPMIN, gen_helper_vfp_mins)
  1150. static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
  1151. {
  1152. /* Handle a 2-reg-shift insn which can be vectorized. */
  1153. int vec_size = a->q ? 16 : 8;
  1154. int rd_ofs = neon_reg_offset(a->vd, 0);
  1155. int rm_ofs = neon_reg_offset(a->vm, 0);
  1156. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1157. return false;
  1158. }
  1159. /* UNDEF accesses to D16-D31 if they don't exist. */
  1160. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1161. ((a->vd | a->vm) & 0x10)) {
  1162. return false;
  1163. }
  1164. if ((a->vm | a->vd) & a->q) {
  1165. return false;
  1166. }
  1167. if (!vfp_access_check(s)) {
  1168. return true;
  1169. }
  1170. fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
  1171. return true;
  1172. }
  1173. #define DO_2SH(INSN, FUNC) \
  1174. static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
  1175. { \
  1176. return do_vector_2sh(s, a, FUNC); \
  1177. } \
  1178. DO_2SH(VSHL, tcg_gen_gvec_shli)
  1179. DO_2SH(VSLI, gen_gvec_sli)
  1180. DO_2SH(VSRI, gen_gvec_sri)
  1181. DO_2SH(VSRA_S, gen_gvec_ssra)
  1182. DO_2SH(VSRA_U, gen_gvec_usra)
  1183. DO_2SH(VRSHR_S, gen_gvec_srshr)
  1184. DO_2SH(VRSHR_U, gen_gvec_urshr)
  1185. DO_2SH(VRSRA_S, gen_gvec_srsra)
  1186. DO_2SH(VRSRA_U, gen_gvec_ursra)
  1187. static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
  1188. {
  1189. /* Signed shift out of range results in all-sign-bits */
  1190. a->shift = MIN(a->shift, (8 << a->size) - 1);
  1191. return do_vector_2sh(s, a, tcg_gen_gvec_sari);
  1192. }
  1193. static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
  1194. int64_t shift, uint32_t oprsz, uint32_t maxsz)
  1195. {
  1196. tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
  1197. }
  1198. static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
  1199. {
  1200. /* Shift out of range is architecturally valid and results in zero. */
  1201. if (a->shift >= (8 << a->size)) {
  1202. return do_vector_2sh(s, a, gen_zero_rd_2sh);
  1203. } else {
  1204. return do_vector_2sh(s, a, tcg_gen_gvec_shri);
  1205. }
  1206. }
  1207. static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
  1208. NeonGenTwo64OpEnvFn *fn)
  1209. {
  1210. /*
  1211. * 2-reg-and-shift operations, size == 3 case, where the
  1212. * function needs to be passed cpu_env.
  1213. */
  1214. TCGv_i64 constimm;
  1215. int pass;
  1216. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1217. return false;
  1218. }
  1219. /* UNDEF accesses to D16-D31 if they don't exist. */
  1220. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1221. ((a->vd | a->vm) & 0x10)) {
  1222. return false;
  1223. }
  1224. if ((a->vm | a->vd) & a->q) {
  1225. return false;
  1226. }
  1227. if (!vfp_access_check(s)) {
  1228. return true;
  1229. }
  1230. /*
  1231. * To avoid excessive duplication of ops we implement shift
  1232. * by immediate using the variable shift operations.
  1233. */
  1234. constimm = tcg_const_i64(dup_const(a->size, a->shift));
  1235. for (pass = 0; pass < a->q + 1; pass++) {
  1236. TCGv_i64 tmp = tcg_temp_new_i64();
  1237. neon_load_reg64(tmp, a->vm + pass);
  1238. fn(tmp, cpu_env, tmp, constimm);
  1239. neon_store_reg64(tmp, a->vd + pass);
  1240. tcg_temp_free_i64(tmp);
  1241. }
  1242. tcg_temp_free_i64(constimm);
  1243. return true;
  1244. }
  1245. static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
  1246. NeonGenTwoOpEnvFn *fn)
  1247. {
  1248. /*
  1249. * 2-reg-and-shift operations, size < 3 case, where the
  1250. * helper needs to be passed cpu_env.
  1251. */
  1252. TCGv_i32 constimm;
  1253. int pass;
  1254. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1255. return false;
  1256. }
  1257. /* UNDEF accesses to D16-D31 if they don't exist. */
  1258. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1259. ((a->vd | a->vm) & 0x10)) {
  1260. return false;
  1261. }
  1262. if ((a->vm | a->vd) & a->q) {
  1263. return false;
  1264. }
  1265. if (!vfp_access_check(s)) {
  1266. return true;
  1267. }
  1268. /*
  1269. * To avoid excessive duplication of ops we implement shift
  1270. * by immediate using the variable shift operations.
  1271. */
  1272. constimm = tcg_const_i32(dup_const(a->size, a->shift));
  1273. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  1274. TCGv_i32 tmp = neon_load_reg(a->vm, pass);
  1275. fn(tmp, cpu_env, tmp, constimm);
  1276. neon_store_reg(a->vd, pass, tmp);
  1277. }
  1278. tcg_temp_free_i32(constimm);
  1279. return true;
  1280. }
  1281. #define DO_2SHIFT_ENV(INSN, FUNC) \
  1282. static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
  1283. { \
  1284. return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
  1285. } \
  1286. static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
  1287. { \
  1288. static NeonGenTwoOpEnvFn * const fns[] = { \
  1289. gen_helper_neon_##FUNC##8, \
  1290. gen_helper_neon_##FUNC##16, \
  1291. gen_helper_neon_##FUNC##32, \
  1292. }; \
  1293. assert(a->size < ARRAY_SIZE(fns)); \
  1294. return do_2shift_env_32(s, a, fns[a->size]); \
  1295. }
  1296. DO_2SHIFT_ENV(VQSHLU, qshlu_s)
  1297. DO_2SHIFT_ENV(VQSHL_U, qshl_u)
  1298. DO_2SHIFT_ENV(VQSHL_S, qshl_s)
  1299. static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
  1300. NeonGenTwo64OpFn *shiftfn,
  1301. NeonGenNarrowEnvFn *narrowfn)
  1302. {
  1303. /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
  1304. TCGv_i64 constimm, rm1, rm2;
  1305. TCGv_i32 rd;
  1306. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1307. return false;
  1308. }
  1309. /* UNDEF accesses to D16-D31 if they don't exist. */
  1310. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1311. ((a->vd | a->vm) & 0x10)) {
  1312. return false;
  1313. }
  1314. if (a->vm & 1) {
  1315. return false;
  1316. }
  1317. if (!vfp_access_check(s)) {
  1318. return true;
  1319. }
  1320. /*
  1321. * This is always a right shift, and the shiftfn is always a
  1322. * left-shift helper, which thus needs the negated shift count.
  1323. */
  1324. constimm = tcg_const_i64(-a->shift);
  1325. rm1 = tcg_temp_new_i64();
  1326. rm2 = tcg_temp_new_i64();
  1327. /* Load both inputs first to avoid potential overwrite if rm == rd */
  1328. neon_load_reg64(rm1, a->vm);
  1329. neon_load_reg64(rm2, a->vm + 1);
  1330. shiftfn(rm1, rm1, constimm);
  1331. rd = tcg_temp_new_i32();
  1332. narrowfn(rd, cpu_env, rm1);
  1333. neon_store_reg(a->vd, 0, rd);
  1334. shiftfn(rm2, rm2, constimm);
  1335. rd = tcg_temp_new_i32();
  1336. narrowfn(rd, cpu_env, rm2);
  1337. neon_store_reg(a->vd, 1, rd);
  1338. tcg_temp_free_i64(rm1);
  1339. tcg_temp_free_i64(rm2);
  1340. tcg_temp_free_i64(constimm);
  1341. return true;
  1342. }
  1343. static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
  1344. NeonGenTwoOpFn *shiftfn,
  1345. NeonGenNarrowEnvFn *narrowfn)
  1346. {
  1347. /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
  1348. TCGv_i32 constimm, rm1, rm2, rm3, rm4;
  1349. TCGv_i64 rtmp;
  1350. uint32_t imm;
  1351. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1352. return false;
  1353. }
  1354. /* UNDEF accesses to D16-D31 if they don't exist. */
  1355. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1356. ((a->vd | a->vm) & 0x10)) {
  1357. return false;
  1358. }
  1359. if (a->vm & 1) {
  1360. return false;
  1361. }
  1362. if (!vfp_access_check(s)) {
  1363. return true;
  1364. }
  1365. /*
  1366. * This is always a right shift, and the shiftfn is always a
  1367. * left-shift helper, which thus needs the negated shift count
  1368. * duplicated into each lane of the immediate value.
  1369. */
  1370. if (a->size == 1) {
  1371. imm = (uint16_t)(-a->shift);
  1372. imm |= imm << 16;
  1373. } else {
  1374. /* size == 2 */
  1375. imm = -a->shift;
  1376. }
  1377. constimm = tcg_const_i32(imm);
  1378. /* Load all inputs first to avoid potential overwrite */
  1379. rm1 = neon_load_reg(a->vm, 0);
  1380. rm2 = neon_load_reg(a->vm, 1);
  1381. rm3 = neon_load_reg(a->vm + 1, 0);
  1382. rm4 = neon_load_reg(a->vm + 1, 1);
  1383. rtmp = tcg_temp_new_i64();
  1384. shiftfn(rm1, rm1, constimm);
  1385. shiftfn(rm2, rm2, constimm);
  1386. tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
  1387. tcg_temp_free_i32(rm2);
  1388. narrowfn(rm1, cpu_env, rtmp);
  1389. neon_store_reg(a->vd, 0, rm1);
  1390. shiftfn(rm3, rm3, constimm);
  1391. shiftfn(rm4, rm4, constimm);
  1392. tcg_temp_free_i32(constimm);
  1393. tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
  1394. tcg_temp_free_i32(rm4);
  1395. narrowfn(rm3, cpu_env, rtmp);
  1396. tcg_temp_free_i64(rtmp);
  1397. neon_store_reg(a->vd, 1, rm3);
  1398. return true;
  1399. }
  1400. #define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
  1401. static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
  1402. { \
  1403. return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
  1404. }
  1405. #define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
  1406. static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
  1407. { \
  1408. return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
  1409. }
  1410. static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
  1411. {
  1412. tcg_gen_extrl_i64_i32(dest, src);
  1413. }
  1414. static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
  1415. {
  1416. gen_helper_neon_narrow_u16(dest, src);
  1417. }
  1418. static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
  1419. {
  1420. gen_helper_neon_narrow_u8(dest, src);
  1421. }
  1422. DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
  1423. DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
  1424. DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
  1425. DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
  1426. DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
  1427. DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
  1428. DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
  1429. DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
  1430. DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
  1431. DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
  1432. DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
  1433. DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
  1434. DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
  1435. DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
  1436. DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
  1437. DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
  1438. DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
  1439. DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
  1440. DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
  1441. DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
  1442. DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
  1443. DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
  1444. DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
  1445. DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
  1446. static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
  1447. NeonGenWidenFn *widenfn, bool u)
  1448. {
  1449. TCGv_i64 tmp;
  1450. TCGv_i32 rm0, rm1;
  1451. uint64_t widen_mask = 0;
  1452. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1453. return false;
  1454. }
  1455. /* UNDEF accesses to D16-D31 if they don't exist. */
  1456. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1457. ((a->vd | a->vm) & 0x10)) {
  1458. return false;
  1459. }
  1460. if (a->vd & 1) {
  1461. return false;
  1462. }
  1463. if (!vfp_access_check(s)) {
  1464. return true;
  1465. }
  1466. /*
  1467. * This is a widen-and-shift operation. The shift is always less
  1468. * than the width of the source type, so after widening the input
  1469. * vector we can simply shift the whole 64-bit widened register,
  1470. * and then clear the potential overflow bits resulting from left
  1471. * bits of the narrow input appearing as right bits of the left
  1472. * neighbour narrow input. Calculate a mask of bits to clear.
  1473. */
  1474. if ((a->shift != 0) && (a->size < 2 || u)) {
  1475. int esize = 8 << a->size;
  1476. widen_mask = MAKE_64BIT_MASK(0, esize);
  1477. widen_mask >>= esize - a->shift;
  1478. widen_mask = dup_const(a->size + 1, widen_mask);
  1479. }
  1480. rm0 = neon_load_reg(a->vm, 0);
  1481. rm1 = neon_load_reg(a->vm, 1);
  1482. tmp = tcg_temp_new_i64();
  1483. widenfn(tmp, rm0);
  1484. tcg_temp_free_i32(rm0);
  1485. if (a->shift != 0) {
  1486. tcg_gen_shli_i64(tmp, tmp, a->shift);
  1487. tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
  1488. }
  1489. neon_store_reg64(tmp, a->vd);
  1490. widenfn(tmp, rm1);
  1491. tcg_temp_free_i32(rm1);
  1492. if (a->shift != 0) {
  1493. tcg_gen_shli_i64(tmp, tmp, a->shift);
  1494. tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
  1495. }
  1496. neon_store_reg64(tmp, a->vd + 1);
  1497. tcg_temp_free_i64(tmp);
  1498. return true;
  1499. }
  1500. static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
  1501. {
  1502. static NeonGenWidenFn * const widenfn[] = {
  1503. gen_helper_neon_widen_s8,
  1504. gen_helper_neon_widen_s16,
  1505. tcg_gen_ext_i32_i64,
  1506. };
  1507. return do_vshll_2sh(s, a, widenfn[a->size], false);
  1508. }
  1509. static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
  1510. {
  1511. static NeonGenWidenFn * const widenfn[] = {
  1512. gen_helper_neon_widen_u8,
  1513. gen_helper_neon_widen_u16,
  1514. tcg_gen_extu_i32_i64,
  1515. };
  1516. return do_vshll_2sh(s, a, widenfn[a->size], true);
  1517. }
  1518. static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
  1519. NeonGenTwoSingleOpFn *fn)
  1520. {
  1521. /* FP operations in 2-reg-and-shift group */
  1522. TCGv_i32 tmp, shiftv;
  1523. TCGv_ptr fpstatus;
  1524. int pass;
  1525. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1526. return false;
  1527. }
  1528. /* UNDEF accesses to D16-D31 if they don't exist. */
  1529. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1530. ((a->vd | a->vm) & 0x10)) {
  1531. return false;
  1532. }
  1533. if ((a->vm | a->vd) & a->q) {
  1534. return false;
  1535. }
  1536. if (!vfp_access_check(s)) {
  1537. return true;
  1538. }
  1539. fpstatus = get_fpstatus_ptr(1);
  1540. shiftv = tcg_const_i32(a->shift);
  1541. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  1542. tmp = neon_load_reg(a->vm, pass);
  1543. fn(tmp, tmp, shiftv, fpstatus);
  1544. neon_store_reg(a->vd, pass, tmp);
  1545. }
  1546. tcg_temp_free_ptr(fpstatus);
  1547. tcg_temp_free_i32(shiftv);
  1548. return true;
  1549. }
  1550. #define DO_FP_2SH(INSN, FUNC) \
  1551. static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
  1552. { \
  1553. return do_fp_2sh(s, a, FUNC); \
  1554. }
  1555. DO_FP_2SH(VCVT_SF, gen_helper_vfp_sltos)
  1556. DO_FP_2SH(VCVT_UF, gen_helper_vfp_ultos)
  1557. DO_FP_2SH(VCVT_FS, gen_helper_vfp_tosls_round_to_zero)
  1558. DO_FP_2SH(VCVT_FU, gen_helper_vfp_touls_round_to_zero)
  1559. static uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
  1560. {
  1561. /*
  1562. * Expand the encoded constant.
  1563. * Note that cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
  1564. * We choose to not special-case this and will behave as if a
  1565. * valid constant encoding of 0 had been given.
  1566. * cmode = 15 op = 1 must UNDEF; we assume decode has handled that.
  1567. */
  1568. switch (cmode) {
  1569. case 0: case 1:
  1570. /* no-op */
  1571. break;
  1572. case 2: case 3:
  1573. imm <<= 8;
  1574. break;
  1575. case 4: case 5:
  1576. imm <<= 16;
  1577. break;
  1578. case 6: case 7:
  1579. imm <<= 24;
  1580. break;
  1581. case 8: case 9:
  1582. imm |= imm << 16;
  1583. break;
  1584. case 10: case 11:
  1585. imm = (imm << 8) | (imm << 24);
  1586. break;
  1587. case 12:
  1588. imm = (imm << 8) | 0xff;
  1589. break;
  1590. case 13:
  1591. imm = (imm << 16) | 0xffff;
  1592. break;
  1593. case 14:
  1594. if (op) {
  1595. /*
  1596. * This is the only case where the top and bottom 32 bits
  1597. * of the encoded constant differ.
  1598. */
  1599. uint64_t imm64 = 0;
  1600. int n;
  1601. for (n = 0; n < 8; n++) {
  1602. if (imm & (1 << n)) {
  1603. imm64 |= (0xffULL << (n * 8));
  1604. }
  1605. }
  1606. return imm64;
  1607. }
  1608. imm |= (imm << 8) | (imm << 16) | (imm << 24);
  1609. break;
  1610. case 15:
  1611. imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
  1612. | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
  1613. break;
  1614. }
  1615. if (op) {
  1616. imm = ~imm;
  1617. }
  1618. return dup_const(MO_32, imm);
  1619. }
  1620. static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
  1621. GVecGen2iFn *fn)
  1622. {
  1623. uint64_t imm;
  1624. int reg_ofs, vec_size;
  1625. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1626. return false;
  1627. }
  1628. /* UNDEF accesses to D16-D31 if they don't exist. */
  1629. if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
  1630. return false;
  1631. }
  1632. if (a->vd & a->q) {
  1633. return false;
  1634. }
  1635. if (!vfp_access_check(s)) {
  1636. return true;
  1637. }
  1638. reg_ofs = neon_reg_offset(a->vd, 0);
  1639. vec_size = a->q ? 16 : 8;
  1640. imm = asimd_imm_const(a->imm, a->cmode, a->op);
  1641. fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
  1642. return true;
  1643. }
  1644. static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
  1645. int64_t c, uint32_t oprsz, uint32_t maxsz)
  1646. {
  1647. tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
  1648. }
  1649. static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
  1650. {
  1651. /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
  1652. GVecGen2iFn *fn;
  1653. if ((a->cmode & 1) && a->cmode < 12) {
  1654. /* for op=1, the imm will be inverted, so BIC becomes AND. */
  1655. fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
  1656. } else {
  1657. /* There is one unallocated cmode/op combination in this space */
  1658. if (a->cmode == 15 && a->op == 1) {
  1659. return false;
  1660. }
  1661. fn = gen_VMOV_1r;
  1662. }
  1663. return do_1reg_imm(s, a, fn);
  1664. }
  1665. static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
  1666. NeonGenWidenFn *widenfn,
  1667. NeonGenTwo64OpFn *opfn,
  1668. bool src1_wide)
  1669. {
  1670. /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
  1671. TCGv_i64 rn0_64, rn1_64, rm_64;
  1672. TCGv_i32 rm;
  1673. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1674. return false;
  1675. }
  1676. /* UNDEF accesses to D16-D31 if they don't exist. */
  1677. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1678. ((a->vd | a->vn | a->vm) & 0x10)) {
  1679. return false;
  1680. }
  1681. if (!widenfn || !opfn) {
  1682. /* size == 3 case, which is an entirely different insn group */
  1683. return false;
  1684. }
  1685. if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
  1686. return false;
  1687. }
  1688. if (!vfp_access_check(s)) {
  1689. return true;
  1690. }
  1691. rn0_64 = tcg_temp_new_i64();
  1692. rn1_64 = tcg_temp_new_i64();
  1693. rm_64 = tcg_temp_new_i64();
  1694. if (src1_wide) {
  1695. neon_load_reg64(rn0_64, a->vn);
  1696. } else {
  1697. TCGv_i32 tmp = neon_load_reg(a->vn, 0);
  1698. widenfn(rn0_64, tmp);
  1699. tcg_temp_free_i32(tmp);
  1700. }
  1701. rm = neon_load_reg(a->vm, 0);
  1702. widenfn(rm_64, rm);
  1703. tcg_temp_free_i32(rm);
  1704. opfn(rn0_64, rn0_64, rm_64);
  1705. /*
  1706. * Load second pass inputs before storing the first pass result, to
  1707. * avoid incorrect results if a narrow input overlaps with the result.
  1708. */
  1709. if (src1_wide) {
  1710. neon_load_reg64(rn1_64, a->vn + 1);
  1711. } else {
  1712. TCGv_i32 tmp = neon_load_reg(a->vn, 1);
  1713. widenfn(rn1_64, tmp);
  1714. tcg_temp_free_i32(tmp);
  1715. }
  1716. rm = neon_load_reg(a->vm, 1);
  1717. neon_store_reg64(rn0_64, a->vd);
  1718. widenfn(rm_64, rm);
  1719. tcg_temp_free_i32(rm);
  1720. opfn(rn1_64, rn1_64, rm_64);
  1721. neon_store_reg64(rn1_64, a->vd + 1);
  1722. tcg_temp_free_i64(rn0_64);
  1723. tcg_temp_free_i64(rn1_64);
  1724. tcg_temp_free_i64(rm_64);
  1725. return true;
  1726. }
  1727. #define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
  1728. static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
  1729. { \
  1730. static NeonGenWidenFn * const widenfn[] = { \
  1731. gen_helper_neon_widen_##S##8, \
  1732. gen_helper_neon_widen_##S##16, \
  1733. tcg_gen_##EXT##_i32_i64, \
  1734. NULL, \
  1735. }; \
  1736. static NeonGenTwo64OpFn * const addfn[] = { \
  1737. gen_helper_neon_##OP##l_u16, \
  1738. gen_helper_neon_##OP##l_u32, \
  1739. tcg_gen_##OP##_i64, \
  1740. NULL, \
  1741. }; \
  1742. return do_prewiden_3d(s, a, widenfn[a->size], \
  1743. addfn[a->size], SRC1WIDE); \
  1744. }
  1745. DO_PREWIDEN(VADDL_S, s, ext, add, false)
  1746. DO_PREWIDEN(VADDL_U, u, extu, add, false)
  1747. DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
  1748. DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
  1749. DO_PREWIDEN(VADDW_S, s, ext, add, true)
  1750. DO_PREWIDEN(VADDW_U, u, extu, add, true)
  1751. DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
  1752. DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
  1753. static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
  1754. NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
  1755. {
  1756. /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
  1757. TCGv_i64 rn_64, rm_64;
  1758. TCGv_i32 rd0, rd1;
  1759. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1760. return false;
  1761. }
  1762. /* UNDEF accesses to D16-D31 if they don't exist. */
  1763. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1764. ((a->vd | a->vn | a->vm) & 0x10)) {
  1765. return false;
  1766. }
  1767. if (!opfn || !narrowfn) {
  1768. /* size == 3 case, which is an entirely different insn group */
  1769. return false;
  1770. }
  1771. if ((a->vn | a->vm) & 1) {
  1772. return false;
  1773. }
  1774. if (!vfp_access_check(s)) {
  1775. return true;
  1776. }
  1777. rn_64 = tcg_temp_new_i64();
  1778. rm_64 = tcg_temp_new_i64();
  1779. rd0 = tcg_temp_new_i32();
  1780. rd1 = tcg_temp_new_i32();
  1781. neon_load_reg64(rn_64, a->vn);
  1782. neon_load_reg64(rm_64, a->vm);
  1783. opfn(rn_64, rn_64, rm_64);
  1784. narrowfn(rd0, rn_64);
  1785. neon_load_reg64(rn_64, a->vn + 1);
  1786. neon_load_reg64(rm_64, a->vm + 1);
  1787. opfn(rn_64, rn_64, rm_64);
  1788. narrowfn(rd1, rn_64);
  1789. neon_store_reg(a->vd, 0, rd0);
  1790. neon_store_reg(a->vd, 1, rd1);
  1791. tcg_temp_free_i64(rn_64);
  1792. tcg_temp_free_i64(rm_64);
  1793. return true;
  1794. }
  1795. #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
  1796. static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
  1797. { \
  1798. static NeonGenTwo64OpFn * const addfn[] = { \
  1799. gen_helper_neon_##OP##l_u16, \
  1800. gen_helper_neon_##OP##l_u32, \
  1801. tcg_gen_##OP##_i64, \
  1802. NULL, \
  1803. }; \
  1804. static NeonGenNarrowFn * const narrowfn[] = { \
  1805. gen_helper_neon_##NARROWTYPE##_high_u8, \
  1806. gen_helper_neon_##NARROWTYPE##_high_u16, \
  1807. EXTOP, \
  1808. NULL, \
  1809. }; \
  1810. return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
  1811. }
  1812. static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
  1813. {
  1814. tcg_gen_addi_i64(rn, rn, 1u << 31);
  1815. tcg_gen_extrh_i64_i32(rd, rn);
  1816. }
  1817. DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
  1818. DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
  1819. DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
  1820. DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
  1821. static bool do_long_3d(DisasContext *s, arg_3diff *a,
  1822. NeonGenTwoOpWidenFn *opfn,
  1823. NeonGenTwo64OpFn *accfn)
  1824. {
  1825. /*
  1826. * 3-regs different lengths, long operations.
  1827. * These perform an operation on two inputs that returns a double-width
  1828. * result, and then possibly perform an accumulation operation of
  1829. * that result into the double-width destination.
  1830. */
  1831. TCGv_i64 rd0, rd1, tmp;
  1832. TCGv_i32 rn, rm;
  1833. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  1834. return false;
  1835. }
  1836. /* UNDEF accesses to D16-D31 if they don't exist. */
  1837. if (!dc_isar_feature(aa32_simd_r32, s) &&
  1838. ((a->vd | a->vn | a->vm) & 0x10)) {
  1839. return false;
  1840. }
  1841. if (!opfn) {
  1842. /* size == 3 case, which is an entirely different insn group */
  1843. return false;
  1844. }
  1845. if (a->vd & 1) {
  1846. return false;
  1847. }
  1848. if (!vfp_access_check(s)) {
  1849. return true;
  1850. }
  1851. rd0 = tcg_temp_new_i64();
  1852. rd1 = tcg_temp_new_i64();
  1853. rn = neon_load_reg(a->vn, 0);
  1854. rm = neon_load_reg(a->vm, 0);
  1855. opfn(rd0, rn, rm);
  1856. tcg_temp_free_i32(rn);
  1857. tcg_temp_free_i32(rm);
  1858. rn = neon_load_reg(a->vn, 1);
  1859. rm = neon_load_reg(a->vm, 1);
  1860. opfn(rd1, rn, rm);
  1861. tcg_temp_free_i32(rn);
  1862. tcg_temp_free_i32(rm);
  1863. /* Don't store results until after all loads: they might overlap */
  1864. if (accfn) {
  1865. tmp = tcg_temp_new_i64();
  1866. neon_load_reg64(tmp, a->vd);
  1867. accfn(tmp, tmp, rd0);
  1868. neon_store_reg64(tmp, a->vd);
  1869. neon_load_reg64(tmp, a->vd + 1);
  1870. accfn(tmp, tmp, rd1);
  1871. neon_store_reg64(tmp, a->vd + 1);
  1872. tcg_temp_free_i64(tmp);
  1873. } else {
  1874. neon_store_reg64(rd0, a->vd);
  1875. neon_store_reg64(rd1, a->vd + 1);
  1876. }
  1877. tcg_temp_free_i64(rd0);
  1878. tcg_temp_free_i64(rd1);
  1879. return true;
  1880. }
  1881. static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
  1882. {
  1883. static NeonGenTwoOpWidenFn * const opfn[] = {
  1884. gen_helper_neon_abdl_s16,
  1885. gen_helper_neon_abdl_s32,
  1886. gen_helper_neon_abdl_s64,
  1887. NULL,
  1888. };
  1889. return do_long_3d(s, a, opfn[a->size], NULL);
  1890. }
  1891. static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
  1892. {
  1893. static NeonGenTwoOpWidenFn * const opfn[] = {
  1894. gen_helper_neon_abdl_u16,
  1895. gen_helper_neon_abdl_u32,
  1896. gen_helper_neon_abdl_u64,
  1897. NULL,
  1898. };
  1899. return do_long_3d(s, a, opfn[a->size], NULL);
  1900. }
  1901. static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
  1902. {
  1903. static NeonGenTwoOpWidenFn * const opfn[] = {
  1904. gen_helper_neon_abdl_s16,
  1905. gen_helper_neon_abdl_s32,
  1906. gen_helper_neon_abdl_s64,
  1907. NULL,
  1908. };
  1909. static NeonGenTwo64OpFn * const addfn[] = {
  1910. gen_helper_neon_addl_u16,
  1911. gen_helper_neon_addl_u32,
  1912. tcg_gen_add_i64,
  1913. NULL,
  1914. };
  1915. return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
  1916. }
  1917. static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
  1918. {
  1919. static NeonGenTwoOpWidenFn * const opfn[] = {
  1920. gen_helper_neon_abdl_u16,
  1921. gen_helper_neon_abdl_u32,
  1922. gen_helper_neon_abdl_u64,
  1923. NULL,
  1924. };
  1925. static NeonGenTwo64OpFn * const addfn[] = {
  1926. gen_helper_neon_addl_u16,
  1927. gen_helper_neon_addl_u32,
  1928. tcg_gen_add_i64,
  1929. NULL,
  1930. };
  1931. return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
  1932. }
  1933. static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
  1934. {
  1935. TCGv_i32 lo = tcg_temp_new_i32();
  1936. TCGv_i32 hi = tcg_temp_new_i32();
  1937. tcg_gen_muls2_i32(lo, hi, rn, rm);
  1938. tcg_gen_concat_i32_i64(rd, lo, hi);
  1939. tcg_temp_free_i32(lo);
  1940. tcg_temp_free_i32(hi);
  1941. }
  1942. static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
  1943. {
  1944. TCGv_i32 lo = tcg_temp_new_i32();
  1945. TCGv_i32 hi = tcg_temp_new_i32();
  1946. tcg_gen_mulu2_i32(lo, hi, rn, rm);
  1947. tcg_gen_concat_i32_i64(rd, lo, hi);
  1948. tcg_temp_free_i32(lo);
  1949. tcg_temp_free_i32(hi);
  1950. }
  1951. static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
  1952. {
  1953. static NeonGenTwoOpWidenFn * const opfn[] = {
  1954. gen_helper_neon_mull_s8,
  1955. gen_helper_neon_mull_s16,
  1956. gen_mull_s32,
  1957. NULL,
  1958. };
  1959. return do_long_3d(s, a, opfn[a->size], NULL);
  1960. }
  1961. static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
  1962. {
  1963. static NeonGenTwoOpWidenFn * const opfn[] = {
  1964. gen_helper_neon_mull_u8,
  1965. gen_helper_neon_mull_u16,
  1966. gen_mull_u32,
  1967. NULL,
  1968. };
  1969. return do_long_3d(s, a, opfn[a->size], NULL);
  1970. }
  1971. #define DO_VMLAL(INSN,MULL,ACC) \
  1972. static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
  1973. { \
  1974. static NeonGenTwoOpWidenFn * const opfn[] = { \
  1975. gen_helper_neon_##MULL##8, \
  1976. gen_helper_neon_##MULL##16, \
  1977. gen_##MULL##32, \
  1978. NULL, \
  1979. }; \
  1980. static NeonGenTwo64OpFn * const accfn[] = { \
  1981. gen_helper_neon_##ACC##l_u16, \
  1982. gen_helper_neon_##ACC##l_u32, \
  1983. tcg_gen_##ACC##_i64, \
  1984. NULL, \
  1985. }; \
  1986. return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
  1987. }
  1988. DO_VMLAL(VMLAL_S,mull_s,add)
  1989. DO_VMLAL(VMLAL_U,mull_u,add)
  1990. DO_VMLAL(VMLSL_S,mull_s,sub)
  1991. DO_VMLAL(VMLSL_U,mull_u,sub)
  1992. static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
  1993. {
  1994. gen_helper_neon_mull_s16(rd, rn, rm);
  1995. gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
  1996. }
  1997. static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
  1998. {
  1999. gen_mull_s32(rd, rn, rm);
  2000. gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
  2001. }
  2002. static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
  2003. {
  2004. static NeonGenTwoOpWidenFn * const opfn[] = {
  2005. NULL,
  2006. gen_VQDMULL_16,
  2007. gen_VQDMULL_32,
  2008. NULL,
  2009. };
  2010. return do_long_3d(s, a, opfn[a->size], NULL);
  2011. }
  2012. static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
  2013. {
  2014. gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
  2015. }
  2016. static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
  2017. {
  2018. gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
  2019. }
  2020. static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
  2021. {
  2022. static NeonGenTwoOpWidenFn * const opfn[] = {
  2023. NULL,
  2024. gen_VQDMULL_16,
  2025. gen_VQDMULL_32,
  2026. NULL,
  2027. };
  2028. static NeonGenTwo64OpFn * const accfn[] = {
  2029. NULL,
  2030. gen_VQDMLAL_acc_16,
  2031. gen_VQDMLAL_acc_32,
  2032. NULL,
  2033. };
  2034. return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
  2035. }
  2036. static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
  2037. {
  2038. gen_helper_neon_negl_u32(rm, rm);
  2039. gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
  2040. }
  2041. static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
  2042. {
  2043. tcg_gen_neg_i64(rm, rm);
  2044. gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
  2045. }
  2046. static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
  2047. {
  2048. static NeonGenTwoOpWidenFn * const opfn[] = {
  2049. NULL,
  2050. gen_VQDMULL_16,
  2051. gen_VQDMULL_32,
  2052. NULL,
  2053. };
  2054. static NeonGenTwo64OpFn * const accfn[] = {
  2055. NULL,
  2056. gen_VQDMLSL_acc_16,
  2057. gen_VQDMLSL_acc_32,
  2058. NULL,
  2059. };
  2060. return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
  2061. }
  2062. static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
  2063. {
  2064. gen_helper_gvec_3 *fn_gvec;
  2065. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2066. return false;
  2067. }
  2068. /* UNDEF accesses to D16-D31 if they don't exist. */
  2069. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2070. ((a->vd | a->vn | a->vm) & 0x10)) {
  2071. return false;
  2072. }
  2073. if (a->vd & 1) {
  2074. return false;
  2075. }
  2076. switch (a->size) {
  2077. case 0:
  2078. fn_gvec = gen_helper_neon_pmull_h;
  2079. break;
  2080. case 2:
  2081. if (!dc_isar_feature(aa32_pmull, s)) {
  2082. return false;
  2083. }
  2084. fn_gvec = gen_helper_gvec_pmull_q;
  2085. break;
  2086. default:
  2087. return false;
  2088. }
  2089. if (!vfp_access_check(s)) {
  2090. return true;
  2091. }
  2092. tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
  2093. neon_reg_offset(a->vn, 0),
  2094. neon_reg_offset(a->vm, 0),
  2095. 16, 16, 0, fn_gvec);
  2096. return true;
  2097. }
  2098. static void gen_neon_dup_low16(TCGv_i32 var)
  2099. {
  2100. TCGv_i32 tmp = tcg_temp_new_i32();
  2101. tcg_gen_ext16u_i32(var, var);
  2102. tcg_gen_shli_i32(tmp, var, 16);
  2103. tcg_gen_or_i32(var, var, tmp);
  2104. tcg_temp_free_i32(tmp);
  2105. }
  2106. static void gen_neon_dup_high16(TCGv_i32 var)
  2107. {
  2108. TCGv_i32 tmp = tcg_temp_new_i32();
  2109. tcg_gen_andi_i32(var, var, 0xffff0000);
  2110. tcg_gen_shri_i32(tmp, var, 16);
  2111. tcg_gen_or_i32(var, var, tmp);
  2112. tcg_temp_free_i32(tmp);
  2113. }
  2114. static inline TCGv_i32 neon_get_scalar(int size, int reg)
  2115. {
  2116. TCGv_i32 tmp;
  2117. if (size == 1) {
  2118. tmp = neon_load_reg(reg & 7, reg >> 4);
  2119. if (reg & 8) {
  2120. gen_neon_dup_high16(tmp);
  2121. } else {
  2122. gen_neon_dup_low16(tmp);
  2123. }
  2124. } else {
  2125. tmp = neon_load_reg(reg & 15, reg >> 4);
  2126. }
  2127. return tmp;
  2128. }
  2129. static bool do_2scalar(DisasContext *s, arg_2scalar *a,
  2130. NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
  2131. {
  2132. /*
  2133. * Two registers and a scalar: perform an operation between
  2134. * the input elements and the scalar, and then possibly
  2135. * perform an accumulation operation of that result into the
  2136. * destination.
  2137. */
  2138. TCGv_i32 scalar;
  2139. int pass;
  2140. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2141. return false;
  2142. }
  2143. /* UNDEF accesses to D16-D31 if they don't exist. */
  2144. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2145. ((a->vd | a->vn | a->vm) & 0x10)) {
  2146. return false;
  2147. }
  2148. if (!opfn) {
  2149. /* Bad size (including size == 3, which is a different insn group) */
  2150. return false;
  2151. }
  2152. if (a->q && ((a->vd | a->vn) & 1)) {
  2153. return false;
  2154. }
  2155. if (!vfp_access_check(s)) {
  2156. return true;
  2157. }
  2158. scalar = neon_get_scalar(a->size, a->vm);
  2159. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  2160. TCGv_i32 tmp = neon_load_reg(a->vn, pass);
  2161. opfn(tmp, tmp, scalar);
  2162. if (accfn) {
  2163. TCGv_i32 rd = neon_load_reg(a->vd, pass);
  2164. accfn(tmp, rd, tmp);
  2165. tcg_temp_free_i32(rd);
  2166. }
  2167. neon_store_reg(a->vd, pass, tmp);
  2168. }
  2169. tcg_temp_free_i32(scalar);
  2170. return true;
  2171. }
  2172. static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
  2173. {
  2174. static NeonGenTwoOpFn * const opfn[] = {
  2175. NULL,
  2176. gen_helper_neon_mul_u16,
  2177. tcg_gen_mul_i32,
  2178. NULL,
  2179. };
  2180. return do_2scalar(s, a, opfn[a->size], NULL);
  2181. }
  2182. static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
  2183. {
  2184. static NeonGenTwoOpFn * const opfn[] = {
  2185. NULL,
  2186. gen_helper_neon_mul_u16,
  2187. tcg_gen_mul_i32,
  2188. NULL,
  2189. };
  2190. static NeonGenTwoOpFn * const accfn[] = {
  2191. NULL,
  2192. gen_helper_neon_add_u16,
  2193. tcg_gen_add_i32,
  2194. NULL,
  2195. };
  2196. return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
  2197. }
  2198. static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
  2199. {
  2200. static NeonGenTwoOpFn * const opfn[] = {
  2201. NULL,
  2202. gen_helper_neon_mul_u16,
  2203. tcg_gen_mul_i32,
  2204. NULL,
  2205. };
  2206. static NeonGenTwoOpFn * const accfn[] = {
  2207. NULL,
  2208. gen_helper_neon_sub_u16,
  2209. tcg_gen_sub_i32,
  2210. NULL,
  2211. };
  2212. return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
  2213. }
  2214. /*
  2215. * Rather than have a float-specific version of do_2scalar just for
  2216. * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
  2217. * a NeonGenTwoOpFn.
  2218. */
  2219. #define WRAP_FP_FN(WRAPNAME, FUNC) \
  2220. static void WRAPNAME(TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
  2221. { \
  2222. TCGv_ptr fpstatus = get_fpstatus_ptr(1); \
  2223. FUNC(rd, rn, rm, fpstatus); \
  2224. tcg_temp_free_ptr(fpstatus); \
  2225. }
  2226. WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
  2227. WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
  2228. WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
  2229. static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
  2230. {
  2231. static NeonGenTwoOpFn * const opfn[] = {
  2232. NULL,
  2233. NULL, /* TODO: fp16 support */
  2234. gen_VMUL_F_mul,
  2235. NULL,
  2236. };
  2237. return do_2scalar(s, a, opfn[a->size], NULL);
  2238. }
  2239. static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
  2240. {
  2241. static NeonGenTwoOpFn * const opfn[] = {
  2242. NULL,
  2243. NULL, /* TODO: fp16 support */
  2244. gen_VMUL_F_mul,
  2245. NULL,
  2246. };
  2247. static NeonGenTwoOpFn * const accfn[] = {
  2248. NULL,
  2249. NULL, /* TODO: fp16 support */
  2250. gen_VMUL_F_add,
  2251. NULL,
  2252. };
  2253. return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
  2254. }
  2255. static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
  2256. {
  2257. static NeonGenTwoOpFn * const opfn[] = {
  2258. NULL,
  2259. NULL, /* TODO: fp16 support */
  2260. gen_VMUL_F_mul,
  2261. NULL,
  2262. };
  2263. static NeonGenTwoOpFn * const accfn[] = {
  2264. NULL,
  2265. NULL, /* TODO: fp16 support */
  2266. gen_VMUL_F_sub,
  2267. NULL,
  2268. };
  2269. return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
  2270. }
  2271. WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
  2272. WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
  2273. WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
  2274. WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
  2275. static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
  2276. {
  2277. static NeonGenTwoOpFn * const opfn[] = {
  2278. NULL,
  2279. gen_VQDMULH_16,
  2280. gen_VQDMULH_32,
  2281. NULL,
  2282. };
  2283. return do_2scalar(s, a, opfn[a->size], NULL);
  2284. }
  2285. static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
  2286. {
  2287. static NeonGenTwoOpFn * const opfn[] = {
  2288. NULL,
  2289. gen_VQRDMULH_16,
  2290. gen_VQRDMULH_32,
  2291. NULL,
  2292. };
  2293. return do_2scalar(s, a, opfn[a->size], NULL);
  2294. }
  2295. static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
  2296. NeonGenThreeOpEnvFn *opfn)
  2297. {
  2298. /*
  2299. * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
  2300. * performs a kind of fused op-then-accumulate using a helper
  2301. * function that takes all of rd, rn and the scalar at once.
  2302. */
  2303. TCGv_i32 scalar;
  2304. int pass;
  2305. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2306. return false;
  2307. }
  2308. if (!dc_isar_feature(aa32_rdm, s)) {
  2309. return false;
  2310. }
  2311. /* UNDEF accesses to D16-D31 if they don't exist. */
  2312. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2313. ((a->vd | a->vn | a->vm) & 0x10)) {
  2314. return false;
  2315. }
  2316. if (!opfn) {
  2317. /* Bad size (including size == 3, which is a different insn group) */
  2318. return false;
  2319. }
  2320. if (a->q && ((a->vd | a->vn) & 1)) {
  2321. return false;
  2322. }
  2323. if (!vfp_access_check(s)) {
  2324. return true;
  2325. }
  2326. scalar = neon_get_scalar(a->size, a->vm);
  2327. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  2328. TCGv_i32 rn = neon_load_reg(a->vn, pass);
  2329. TCGv_i32 rd = neon_load_reg(a->vd, pass);
  2330. opfn(rd, cpu_env, rn, scalar, rd);
  2331. tcg_temp_free_i32(rn);
  2332. neon_store_reg(a->vd, pass, rd);
  2333. }
  2334. tcg_temp_free_i32(scalar);
  2335. return true;
  2336. }
  2337. static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
  2338. {
  2339. static NeonGenThreeOpEnvFn *opfn[] = {
  2340. NULL,
  2341. gen_helper_neon_qrdmlah_s16,
  2342. gen_helper_neon_qrdmlah_s32,
  2343. NULL,
  2344. };
  2345. return do_vqrdmlah_2sc(s, a, opfn[a->size]);
  2346. }
  2347. static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
  2348. {
  2349. static NeonGenThreeOpEnvFn *opfn[] = {
  2350. NULL,
  2351. gen_helper_neon_qrdmlsh_s16,
  2352. gen_helper_neon_qrdmlsh_s32,
  2353. NULL,
  2354. };
  2355. return do_vqrdmlah_2sc(s, a, opfn[a->size]);
  2356. }
  2357. static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
  2358. NeonGenTwoOpWidenFn *opfn,
  2359. NeonGenTwo64OpFn *accfn)
  2360. {
  2361. /*
  2362. * Two registers and a scalar, long operations: perform an
  2363. * operation on the input elements and the scalar which produces
  2364. * a double-width result, and then possibly perform an accumulation
  2365. * operation of that result into the destination.
  2366. */
  2367. TCGv_i32 scalar, rn;
  2368. TCGv_i64 rn0_64, rn1_64;
  2369. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2370. return false;
  2371. }
  2372. /* UNDEF accesses to D16-D31 if they don't exist. */
  2373. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2374. ((a->vd | a->vn | a->vm) & 0x10)) {
  2375. return false;
  2376. }
  2377. if (!opfn) {
  2378. /* Bad size (including size == 3, which is a different insn group) */
  2379. return false;
  2380. }
  2381. if (a->vd & 1) {
  2382. return false;
  2383. }
  2384. if (!vfp_access_check(s)) {
  2385. return true;
  2386. }
  2387. scalar = neon_get_scalar(a->size, a->vm);
  2388. /* Load all inputs before writing any outputs, in case of overlap */
  2389. rn = neon_load_reg(a->vn, 0);
  2390. rn0_64 = tcg_temp_new_i64();
  2391. opfn(rn0_64, rn, scalar);
  2392. tcg_temp_free_i32(rn);
  2393. rn = neon_load_reg(a->vn, 1);
  2394. rn1_64 = tcg_temp_new_i64();
  2395. opfn(rn1_64, rn, scalar);
  2396. tcg_temp_free_i32(rn);
  2397. tcg_temp_free_i32(scalar);
  2398. if (accfn) {
  2399. TCGv_i64 t64 = tcg_temp_new_i64();
  2400. neon_load_reg64(t64, a->vd);
  2401. accfn(t64, t64, rn0_64);
  2402. neon_store_reg64(t64, a->vd);
  2403. neon_load_reg64(t64, a->vd + 1);
  2404. accfn(t64, t64, rn1_64);
  2405. neon_store_reg64(t64, a->vd + 1);
  2406. tcg_temp_free_i64(t64);
  2407. } else {
  2408. neon_store_reg64(rn0_64, a->vd);
  2409. neon_store_reg64(rn1_64, a->vd + 1);
  2410. }
  2411. tcg_temp_free_i64(rn0_64);
  2412. tcg_temp_free_i64(rn1_64);
  2413. return true;
  2414. }
  2415. static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
  2416. {
  2417. static NeonGenTwoOpWidenFn * const opfn[] = {
  2418. NULL,
  2419. gen_helper_neon_mull_s16,
  2420. gen_mull_s32,
  2421. NULL,
  2422. };
  2423. return do_2scalar_long(s, a, opfn[a->size], NULL);
  2424. }
  2425. static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
  2426. {
  2427. static NeonGenTwoOpWidenFn * const opfn[] = {
  2428. NULL,
  2429. gen_helper_neon_mull_u16,
  2430. gen_mull_u32,
  2431. NULL,
  2432. };
  2433. return do_2scalar_long(s, a, opfn[a->size], NULL);
  2434. }
  2435. #define DO_VMLAL_2SC(INSN, MULL, ACC) \
  2436. static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
  2437. { \
  2438. static NeonGenTwoOpWidenFn * const opfn[] = { \
  2439. NULL, \
  2440. gen_helper_neon_##MULL##16, \
  2441. gen_##MULL##32, \
  2442. NULL, \
  2443. }; \
  2444. static NeonGenTwo64OpFn * const accfn[] = { \
  2445. NULL, \
  2446. gen_helper_neon_##ACC##l_u32, \
  2447. tcg_gen_##ACC##_i64, \
  2448. NULL, \
  2449. }; \
  2450. return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
  2451. }
  2452. DO_VMLAL_2SC(VMLAL_S, mull_s, add)
  2453. DO_VMLAL_2SC(VMLAL_U, mull_u, add)
  2454. DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
  2455. DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
  2456. static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
  2457. {
  2458. static NeonGenTwoOpWidenFn * const opfn[] = {
  2459. NULL,
  2460. gen_VQDMULL_16,
  2461. gen_VQDMULL_32,
  2462. NULL,
  2463. };
  2464. return do_2scalar_long(s, a, opfn[a->size], NULL);
  2465. }
  2466. static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
  2467. {
  2468. static NeonGenTwoOpWidenFn * const opfn[] = {
  2469. NULL,
  2470. gen_VQDMULL_16,
  2471. gen_VQDMULL_32,
  2472. NULL,
  2473. };
  2474. static NeonGenTwo64OpFn * const accfn[] = {
  2475. NULL,
  2476. gen_VQDMLAL_acc_16,
  2477. gen_VQDMLAL_acc_32,
  2478. NULL,
  2479. };
  2480. return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
  2481. }
  2482. static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
  2483. {
  2484. static NeonGenTwoOpWidenFn * const opfn[] = {
  2485. NULL,
  2486. gen_VQDMULL_16,
  2487. gen_VQDMULL_32,
  2488. NULL,
  2489. };
  2490. static NeonGenTwo64OpFn * const accfn[] = {
  2491. NULL,
  2492. gen_VQDMLSL_acc_16,
  2493. gen_VQDMLSL_acc_32,
  2494. NULL,
  2495. };
  2496. return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
  2497. }
  2498. static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
  2499. {
  2500. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2501. return false;
  2502. }
  2503. /* UNDEF accesses to D16-D31 if they don't exist. */
  2504. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2505. ((a->vd | a->vn | a->vm) & 0x10)) {
  2506. return false;
  2507. }
  2508. if ((a->vn | a->vm | a->vd) & a->q) {
  2509. return false;
  2510. }
  2511. if (a->imm > 7 && !a->q) {
  2512. return false;
  2513. }
  2514. if (!vfp_access_check(s)) {
  2515. return true;
  2516. }
  2517. if (!a->q) {
  2518. /* Extract 64 bits from <Vm:Vn> */
  2519. TCGv_i64 left, right, dest;
  2520. left = tcg_temp_new_i64();
  2521. right = tcg_temp_new_i64();
  2522. dest = tcg_temp_new_i64();
  2523. neon_load_reg64(right, a->vn);
  2524. neon_load_reg64(left, a->vm);
  2525. tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
  2526. neon_store_reg64(dest, a->vd);
  2527. tcg_temp_free_i64(left);
  2528. tcg_temp_free_i64(right);
  2529. tcg_temp_free_i64(dest);
  2530. } else {
  2531. /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
  2532. TCGv_i64 left, middle, right, destleft, destright;
  2533. left = tcg_temp_new_i64();
  2534. middle = tcg_temp_new_i64();
  2535. right = tcg_temp_new_i64();
  2536. destleft = tcg_temp_new_i64();
  2537. destright = tcg_temp_new_i64();
  2538. if (a->imm < 8) {
  2539. neon_load_reg64(right, a->vn);
  2540. neon_load_reg64(middle, a->vn + 1);
  2541. tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
  2542. neon_load_reg64(left, a->vm);
  2543. tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
  2544. } else {
  2545. neon_load_reg64(right, a->vn + 1);
  2546. neon_load_reg64(middle, a->vm);
  2547. tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
  2548. neon_load_reg64(left, a->vm + 1);
  2549. tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
  2550. }
  2551. neon_store_reg64(destright, a->vd);
  2552. neon_store_reg64(destleft, a->vd + 1);
  2553. tcg_temp_free_i64(destright);
  2554. tcg_temp_free_i64(destleft);
  2555. tcg_temp_free_i64(right);
  2556. tcg_temp_free_i64(middle);
  2557. tcg_temp_free_i64(left);
  2558. }
  2559. return true;
  2560. }
  2561. static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
  2562. {
  2563. int n;
  2564. TCGv_i32 tmp, tmp2, tmp3, tmp4;
  2565. TCGv_ptr ptr1;
  2566. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2567. return false;
  2568. }
  2569. /* UNDEF accesses to D16-D31 if they don't exist. */
  2570. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2571. ((a->vd | a->vn | a->vm) & 0x10)) {
  2572. return false;
  2573. }
  2574. if (!vfp_access_check(s)) {
  2575. return true;
  2576. }
  2577. n = a->len + 1;
  2578. if ((a->vn + n) > 32) {
  2579. /*
  2580. * This is UNPREDICTABLE; we choose to UNDEF to avoid the
  2581. * helper function running off the end of the register file.
  2582. */
  2583. return false;
  2584. }
  2585. n <<= 3;
  2586. if (a->op) {
  2587. tmp = neon_load_reg(a->vd, 0);
  2588. } else {
  2589. tmp = tcg_temp_new_i32();
  2590. tcg_gen_movi_i32(tmp, 0);
  2591. }
  2592. tmp2 = neon_load_reg(a->vm, 0);
  2593. ptr1 = vfp_reg_ptr(true, a->vn);
  2594. tmp4 = tcg_const_i32(n);
  2595. gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
  2596. tcg_temp_free_i32(tmp);
  2597. if (a->op) {
  2598. tmp = neon_load_reg(a->vd, 1);
  2599. } else {
  2600. tmp = tcg_temp_new_i32();
  2601. tcg_gen_movi_i32(tmp, 0);
  2602. }
  2603. tmp3 = neon_load_reg(a->vm, 1);
  2604. gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
  2605. tcg_temp_free_i32(tmp4);
  2606. tcg_temp_free_ptr(ptr1);
  2607. neon_store_reg(a->vd, 0, tmp2);
  2608. neon_store_reg(a->vd, 1, tmp3);
  2609. tcg_temp_free_i32(tmp);
  2610. return true;
  2611. }
  2612. static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
  2613. {
  2614. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2615. return false;
  2616. }
  2617. /* UNDEF accesses to D16-D31 if they don't exist. */
  2618. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2619. ((a->vd | a->vm) & 0x10)) {
  2620. return false;
  2621. }
  2622. if (a->vd & a->q) {
  2623. return false;
  2624. }
  2625. if (!vfp_access_check(s)) {
  2626. return true;
  2627. }
  2628. tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
  2629. neon_element_offset(a->vm, a->index, a->size),
  2630. a->q ? 16 : 8, a->q ? 16 : 8);
  2631. return true;
  2632. }
  2633. static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
  2634. {
  2635. int pass, half;
  2636. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2637. return false;
  2638. }
  2639. /* UNDEF accesses to D16-D31 if they don't exist. */
  2640. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2641. ((a->vd | a->vm) & 0x10)) {
  2642. return false;
  2643. }
  2644. if ((a->vd | a->vm) & a->q) {
  2645. return false;
  2646. }
  2647. if (a->size == 3) {
  2648. return false;
  2649. }
  2650. if (!vfp_access_check(s)) {
  2651. return true;
  2652. }
  2653. for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
  2654. TCGv_i32 tmp[2];
  2655. for (half = 0; half < 2; half++) {
  2656. tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
  2657. switch (a->size) {
  2658. case 0:
  2659. tcg_gen_bswap32_i32(tmp[half], tmp[half]);
  2660. break;
  2661. case 1:
  2662. gen_swap_half(tmp[half], tmp[half]);
  2663. break;
  2664. case 2:
  2665. break;
  2666. default:
  2667. g_assert_not_reached();
  2668. }
  2669. }
  2670. neon_store_reg(a->vd, pass * 2, tmp[1]);
  2671. neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
  2672. }
  2673. return true;
  2674. }
  2675. static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
  2676. NeonGenWidenFn *widenfn,
  2677. NeonGenTwo64OpFn *opfn,
  2678. NeonGenTwo64OpFn *accfn)
  2679. {
  2680. /*
  2681. * Pairwise long operations: widen both halves of the pair,
  2682. * combine the pairs with the opfn, and then possibly accumulate
  2683. * into the destination with the accfn.
  2684. */
  2685. int pass;
  2686. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2687. return false;
  2688. }
  2689. /* UNDEF accesses to D16-D31 if they don't exist. */
  2690. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2691. ((a->vd | a->vm) & 0x10)) {
  2692. return false;
  2693. }
  2694. if ((a->vd | a->vm) & a->q) {
  2695. return false;
  2696. }
  2697. if (!widenfn) {
  2698. return false;
  2699. }
  2700. if (!vfp_access_check(s)) {
  2701. return true;
  2702. }
  2703. for (pass = 0; pass < a->q + 1; pass++) {
  2704. TCGv_i32 tmp;
  2705. TCGv_i64 rm0_64, rm1_64, rd_64;
  2706. rm0_64 = tcg_temp_new_i64();
  2707. rm1_64 = tcg_temp_new_i64();
  2708. rd_64 = tcg_temp_new_i64();
  2709. tmp = neon_load_reg(a->vm, pass * 2);
  2710. widenfn(rm0_64, tmp);
  2711. tcg_temp_free_i32(tmp);
  2712. tmp = neon_load_reg(a->vm, pass * 2 + 1);
  2713. widenfn(rm1_64, tmp);
  2714. tcg_temp_free_i32(tmp);
  2715. opfn(rd_64, rm0_64, rm1_64);
  2716. tcg_temp_free_i64(rm0_64);
  2717. tcg_temp_free_i64(rm1_64);
  2718. if (accfn) {
  2719. TCGv_i64 tmp64 = tcg_temp_new_i64();
  2720. neon_load_reg64(tmp64, a->vd + pass);
  2721. accfn(rd_64, tmp64, rd_64);
  2722. tcg_temp_free_i64(tmp64);
  2723. }
  2724. neon_store_reg64(rd_64, a->vd + pass);
  2725. tcg_temp_free_i64(rd_64);
  2726. }
  2727. return true;
  2728. }
  2729. static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
  2730. {
  2731. static NeonGenWidenFn * const widenfn[] = {
  2732. gen_helper_neon_widen_s8,
  2733. gen_helper_neon_widen_s16,
  2734. tcg_gen_ext_i32_i64,
  2735. NULL,
  2736. };
  2737. static NeonGenTwo64OpFn * const opfn[] = {
  2738. gen_helper_neon_paddl_u16,
  2739. gen_helper_neon_paddl_u32,
  2740. tcg_gen_add_i64,
  2741. NULL,
  2742. };
  2743. return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
  2744. }
  2745. static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
  2746. {
  2747. static NeonGenWidenFn * const widenfn[] = {
  2748. gen_helper_neon_widen_u8,
  2749. gen_helper_neon_widen_u16,
  2750. tcg_gen_extu_i32_i64,
  2751. NULL,
  2752. };
  2753. static NeonGenTwo64OpFn * const opfn[] = {
  2754. gen_helper_neon_paddl_u16,
  2755. gen_helper_neon_paddl_u32,
  2756. tcg_gen_add_i64,
  2757. NULL,
  2758. };
  2759. return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
  2760. }
  2761. static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
  2762. {
  2763. static NeonGenWidenFn * const widenfn[] = {
  2764. gen_helper_neon_widen_s8,
  2765. gen_helper_neon_widen_s16,
  2766. tcg_gen_ext_i32_i64,
  2767. NULL,
  2768. };
  2769. static NeonGenTwo64OpFn * const opfn[] = {
  2770. gen_helper_neon_paddl_u16,
  2771. gen_helper_neon_paddl_u32,
  2772. tcg_gen_add_i64,
  2773. NULL,
  2774. };
  2775. static NeonGenTwo64OpFn * const accfn[] = {
  2776. gen_helper_neon_addl_u16,
  2777. gen_helper_neon_addl_u32,
  2778. tcg_gen_add_i64,
  2779. NULL,
  2780. };
  2781. return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
  2782. accfn[a->size]);
  2783. }
  2784. static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
  2785. {
  2786. static NeonGenWidenFn * const widenfn[] = {
  2787. gen_helper_neon_widen_u8,
  2788. gen_helper_neon_widen_u16,
  2789. tcg_gen_extu_i32_i64,
  2790. NULL,
  2791. };
  2792. static NeonGenTwo64OpFn * const opfn[] = {
  2793. gen_helper_neon_paddl_u16,
  2794. gen_helper_neon_paddl_u32,
  2795. tcg_gen_add_i64,
  2796. NULL,
  2797. };
  2798. static NeonGenTwo64OpFn * const accfn[] = {
  2799. gen_helper_neon_addl_u16,
  2800. gen_helper_neon_addl_u32,
  2801. tcg_gen_add_i64,
  2802. NULL,
  2803. };
  2804. return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
  2805. accfn[a->size]);
  2806. }
  2807. typedef void ZipFn(TCGv_ptr, TCGv_ptr);
  2808. static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
  2809. ZipFn *fn)
  2810. {
  2811. TCGv_ptr pd, pm;
  2812. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2813. return false;
  2814. }
  2815. /* UNDEF accesses to D16-D31 if they don't exist. */
  2816. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2817. ((a->vd | a->vm) & 0x10)) {
  2818. return false;
  2819. }
  2820. if ((a->vd | a->vm) & a->q) {
  2821. return false;
  2822. }
  2823. if (!fn) {
  2824. /* Bad size or size/q combination */
  2825. return false;
  2826. }
  2827. if (!vfp_access_check(s)) {
  2828. return true;
  2829. }
  2830. pd = vfp_reg_ptr(true, a->vd);
  2831. pm = vfp_reg_ptr(true, a->vm);
  2832. fn(pd, pm);
  2833. tcg_temp_free_ptr(pd);
  2834. tcg_temp_free_ptr(pm);
  2835. return true;
  2836. }
  2837. static bool trans_VUZP(DisasContext *s, arg_2misc *a)
  2838. {
  2839. static ZipFn * const fn[2][4] = {
  2840. {
  2841. gen_helper_neon_unzip8,
  2842. gen_helper_neon_unzip16,
  2843. NULL,
  2844. NULL,
  2845. }, {
  2846. gen_helper_neon_qunzip8,
  2847. gen_helper_neon_qunzip16,
  2848. gen_helper_neon_qunzip32,
  2849. NULL,
  2850. }
  2851. };
  2852. return do_zip_uzp(s, a, fn[a->q][a->size]);
  2853. }
  2854. static bool trans_VZIP(DisasContext *s, arg_2misc *a)
  2855. {
  2856. static ZipFn * const fn[2][4] = {
  2857. {
  2858. gen_helper_neon_zip8,
  2859. gen_helper_neon_zip16,
  2860. NULL,
  2861. NULL,
  2862. }, {
  2863. gen_helper_neon_qzip8,
  2864. gen_helper_neon_qzip16,
  2865. gen_helper_neon_qzip32,
  2866. NULL,
  2867. }
  2868. };
  2869. return do_zip_uzp(s, a, fn[a->q][a->size]);
  2870. }
  2871. static bool do_vmovn(DisasContext *s, arg_2misc *a,
  2872. NeonGenNarrowEnvFn *narrowfn)
  2873. {
  2874. TCGv_i64 rm;
  2875. TCGv_i32 rd0, rd1;
  2876. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2877. return false;
  2878. }
  2879. /* UNDEF accesses to D16-D31 if they don't exist. */
  2880. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2881. ((a->vd | a->vm) & 0x10)) {
  2882. return false;
  2883. }
  2884. if (a->vm & 1) {
  2885. return false;
  2886. }
  2887. if (!narrowfn) {
  2888. return false;
  2889. }
  2890. if (!vfp_access_check(s)) {
  2891. return true;
  2892. }
  2893. rm = tcg_temp_new_i64();
  2894. rd0 = tcg_temp_new_i32();
  2895. rd1 = tcg_temp_new_i32();
  2896. neon_load_reg64(rm, a->vm);
  2897. narrowfn(rd0, cpu_env, rm);
  2898. neon_load_reg64(rm, a->vm + 1);
  2899. narrowfn(rd1, cpu_env, rm);
  2900. neon_store_reg(a->vd, 0, rd0);
  2901. neon_store_reg(a->vd, 1, rd1);
  2902. tcg_temp_free_i64(rm);
  2903. return true;
  2904. }
  2905. #define DO_VMOVN(INSN, FUNC) \
  2906. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  2907. { \
  2908. static NeonGenNarrowEnvFn * const narrowfn[] = { \
  2909. FUNC##8, \
  2910. FUNC##16, \
  2911. FUNC##32, \
  2912. NULL, \
  2913. }; \
  2914. return do_vmovn(s, a, narrowfn[a->size]); \
  2915. }
  2916. DO_VMOVN(VMOVN, gen_neon_narrow_u)
  2917. DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
  2918. DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
  2919. DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
  2920. static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
  2921. {
  2922. TCGv_i32 rm0, rm1;
  2923. TCGv_i64 rd;
  2924. static NeonGenWidenFn * const widenfns[] = {
  2925. gen_helper_neon_widen_u8,
  2926. gen_helper_neon_widen_u16,
  2927. tcg_gen_extu_i32_i64,
  2928. NULL,
  2929. };
  2930. NeonGenWidenFn *widenfn = widenfns[a->size];
  2931. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  2932. return false;
  2933. }
  2934. /* UNDEF accesses to D16-D31 if they don't exist. */
  2935. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2936. ((a->vd | a->vm) & 0x10)) {
  2937. return false;
  2938. }
  2939. if (a->vd & 1) {
  2940. return false;
  2941. }
  2942. if (!widenfn) {
  2943. return false;
  2944. }
  2945. if (!vfp_access_check(s)) {
  2946. return true;
  2947. }
  2948. rd = tcg_temp_new_i64();
  2949. rm0 = neon_load_reg(a->vm, 0);
  2950. rm1 = neon_load_reg(a->vm, 1);
  2951. widenfn(rd, rm0);
  2952. tcg_gen_shli_i64(rd, rd, 8 << a->size);
  2953. neon_store_reg64(rd, a->vd);
  2954. widenfn(rd, rm1);
  2955. tcg_gen_shli_i64(rd, rd, 8 << a->size);
  2956. neon_store_reg64(rd, a->vd + 1);
  2957. tcg_temp_free_i64(rd);
  2958. tcg_temp_free_i32(rm0);
  2959. tcg_temp_free_i32(rm1);
  2960. return true;
  2961. }
  2962. static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
  2963. {
  2964. TCGv_ptr fpst;
  2965. TCGv_i32 ahp, tmp, tmp2, tmp3;
  2966. if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
  2967. !dc_isar_feature(aa32_fp16_spconv, s)) {
  2968. return false;
  2969. }
  2970. /* UNDEF accesses to D16-D31 if they don't exist. */
  2971. if (!dc_isar_feature(aa32_simd_r32, s) &&
  2972. ((a->vd | a->vm) & 0x10)) {
  2973. return false;
  2974. }
  2975. if ((a->vm & 1) || (a->size != 1)) {
  2976. return false;
  2977. }
  2978. if (!vfp_access_check(s)) {
  2979. return true;
  2980. }
  2981. fpst = get_fpstatus_ptr(true);
  2982. ahp = get_ahp_flag();
  2983. tmp = neon_load_reg(a->vm, 0);
  2984. gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
  2985. tmp2 = neon_load_reg(a->vm, 1);
  2986. gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
  2987. tcg_gen_shli_i32(tmp2, tmp2, 16);
  2988. tcg_gen_or_i32(tmp2, tmp2, tmp);
  2989. tcg_temp_free_i32(tmp);
  2990. tmp = neon_load_reg(a->vm, 2);
  2991. gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
  2992. tmp3 = neon_load_reg(a->vm, 3);
  2993. neon_store_reg(a->vd, 0, tmp2);
  2994. gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
  2995. tcg_gen_shli_i32(tmp3, tmp3, 16);
  2996. tcg_gen_or_i32(tmp3, tmp3, tmp);
  2997. neon_store_reg(a->vd, 1, tmp3);
  2998. tcg_temp_free_i32(tmp);
  2999. tcg_temp_free_i32(ahp);
  3000. tcg_temp_free_ptr(fpst);
  3001. return true;
  3002. }
  3003. static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
  3004. {
  3005. TCGv_ptr fpst;
  3006. TCGv_i32 ahp, tmp, tmp2, tmp3;
  3007. if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
  3008. !dc_isar_feature(aa32_fp16_spconv, s)) {
  3009. return false;
  3010. }
  3011. /* UNDEF accesses to D16-D31 if they don't exist. */
  3012. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3013. ((a->vd | a->vm) & 0x10)) {
  3014. return false;
  3015. }
  3016. if ((a->vd & 1) || (a->size != 1)) {
  3017. return false;
  3018. }
  3019. if (!vfp_access_check(s)) {
  3020. return true;
  3021. }
  3022. fpst = get_fpstatus_ptr(true);
  3023. ahp = get_ahp_flag();
  3024. tmp3 = tcg_temp_new_i32();
  3025. tmp = neon_load_reg(a->vm, 0);
  3026. tmp2 = neon_load_reg(a->vm, 1);
  3027. tcg_gen_ext16u_i32(tmp3, tmp);
  3028. gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
  3029. neon_store_reg(a->vd, 0, tmp3);
  3030. tcg_gen_shri_i32(tmp, tmp, 16);
  3031. gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
  3032. neon_store_reg(a->vd, 1, tmp);
  3033. tmp3 = tcg_temp_new_i32();
  3034. tcg_gen_ext16u_i32(tmp3, tmp2);
  3035. gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
  3036. neon_store_reg(a->vd, 2, tmp3);
  3037. tcg_gen_shri_i32(tmp2, tmp2, 16);
  3038. gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
  3039. neon_store_reg(a->vd, 3, tmp2);
  3040. tcg_temp_free_i32(ahp);
  3041. tcg_temp_free_ptr(fpst);
  3042. return true;
  3043. }
  3044. static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
  3045. {
  3046. int vec_size = a->q ? 16 : 8;
  3047. int rd_ofs = neon_reg_offset(a->vd, 0);
  3048. int rm_ofs = neon_reg_offset(a->vm, 0);
  3049. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  3050. return false;
  3051. }
  3052. /* UNDEF accesses to D16-D31 if they don't exist. */
  3053. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3054. ((a->vd | a->vm) & 0x10)) {
  3055. return false;
  3056. }
  3057. if (a->size == 3) {
  3058. return false;
  3059. }
  3060. if ((a->vd | a->vm) & a->q) {
  3061. return false;
  3062. }
  3063. if (!vfp_access_check(s)) {
  3064. return true;
  3065. }
  3066. fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
  3067. return true;
  3068. }
  3069. #define DO_2MISC_VEC(INSN, FN) \
  3070. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3071. { \
  3072. return do_2misc_vec(s, a, FN); \
  3073. }
  3074. DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
  3075. DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
  3076. DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
  3077. DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
  3078. DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
  3079. DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
  3080. DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
  3081. static bool trans_VMVN(DisasContext *s, arg_2misc *a)
  3082. {
  3083. if (a->size != 0) {
  3084. return false;
  3085. }
  3086. return do_2misc_vec(s, a, tcg_gen_gvec_not);
  3087. }
  3088. #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
  3089. static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
  3090. uint32_t rm_ofs, uint32_t oprsz, \
  3091. uint32_t maxsz) \
  3092. { \
  3093. tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
  3094. DATA, FUNC); \
  3095. }
  3096. #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
  3097. static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
  3098. uint32_t rm_ofs, uint32_t oprsz, \
  3099. uint32_t maxsz) \
  3100. { \
  3101. tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
  3102. }
  3103. WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
  3104. WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
  3105. WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
  3106. WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
  3107. WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
  3108. WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
  3109. WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
  3110. #define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
  3111. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3112. { \
  3113. if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
  3114. return false; \
  3115. } \
  3116. return do_2misc_vec(s, a, gen_##INSN); \
  3117. }
  3118. DO_2M_CRYPTO(AESE, aa32_aes, 0)
  3119. DO_2M_CRYPTO(AESD, aa32_aes, 0)
  3120. DO_2M_CRYPTO(AESMC, aa32_aes, 0)
  3121. DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
  3122. DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
  3123. DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
  3124. DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
  3125. static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
  3126. {
  3127. int pass;
  3128. /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
  3129. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  3130. return false;
  3131. }
  3132. /* UNDEF accesses to D16-D31 if they don't exist. */
  3133. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3134. ((a->vd | a->vm) & 0x10)) {
  3135. return false;
  3136. }
  3137. if (!fn) {
  3138. return false;
  3139. }
  3140. if ((a->vd | a->vm) & a->q) {
  3141. return false;
  3142. }
  3143. if (!vfp_access_check(s)) {
  3144. return true;
  3145. }
  3146. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  3147. TCGv_i32 tmp = neon_load_reg(a->vm, pass);
  3148. fn(tmp, tmp);
  3149. neon_store_reg(a->vd, pass, tmp);
  3150. }
  3151. return true;
  3152. }
  3153. static bool trans_VREV32(DisasContext *s, arg_2misc *a)
  3154. {
  3155. static NeonGenOneOpFn * const fn[] = {
  3156. tcg_gen_bswap32_i32,
  3157. gen_swap_half,
  3158. NULL,
  3159. NULL,
  3160. };
  3161. return do_2misc(s, a, fn[a->size]);
  3162. }
  3163. static bool trans_VREV16(DisasContext *s, arg_2misc *a)
  3164. {
  3165. if (a->size != 0) {
  3166. return false;
  3167. }
  3168. return do_2misc(s, a, gen_rev16);
  3169. }
  3170. static bool trans_VCLS(DisasContext *s, arg_2misc *a)
  3171. {
  3172. static NeonGenOneOpFn * const fn[] = {
  3173. gen_helper_neon_cls_s8,
  3174. gen_helper_neon_cls_s16,
  3175. gen_helper_neon_cls_s32,
  3176. NULL,
  3177. };
  3178. return do_2misc(s, a, fn[a->size]);
  3179. }
  3180. static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
  3181. {
  3182. tcg_gen_clzi_i32(rd, rm, 32);
  3183. }
  3184. static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
  3185. {
  3186. static NeonGenOneOpFn * const fn[] = {
  3187. gen_helper_neon_clz_u8,
  3188. gen_helper_neon_clz_u16,
  3189. do_VCLZ_32,
  3190. NULL,
  3191. };
  3192. return do_2misc(s, a, fn[a->size]);
  3193. }
  3194. static bool trans_VCNT(DisasContext *s, arg_2misc *a)
  3195. {
  3196. if (a->size != 0) {
  3197. return false;
  3198. }
  3199. return do_2misc(s, a, gen_helper_neon_cnt_u8);
  3200. }
  3201. static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
  3202. {
  3203. if (a->size != 2) {
  3204. return false;
  3205. }
  3206. /* TODO: FP16 : size == 1 */
  3207. return do_2misc(s, a, gen_helper_vfp_abss);
  3208. }
  3209. static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
  3210. {
  3211. if (a->size != 2) {
  3212. return false;
  3213. }
  3214. /* TODO: FP16 : size == 1 */
  3215. return do_2misc(s, a, gen_helper_vfp_negs);
  3216. }
  3217. static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
  3218. {
  3219. if (a->size != 2) {
  3220. return false;
  3221. }
  3222. return do_2misc(s, a, gen_helper_recpe_u32);
  3223. }
  3224. static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
  3225. {
  3226. if (a->size != 2) {
  3227. return false;
  3228. }
  3229. return do_2misc(s, a, gen_helper_rsqrte_u32);
  3230. }
  3231. #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
  3232. static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
  3233. { \
  3234. FUNC(d, cpu_env, m); \
  3235. }
  3236. WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
  3237. WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
  3238. WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
  3239. WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
  3240. WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
  3241. WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
  3242. static bool trans_VQABS(DisasContext *s, arg_2misc *a)
  3243. {
  3244. static NeonGenOneOpFn * const fn[] = {
  3245. gen_VQABS_s8,
  3246. gen_VQABS_s16,
  3247. gen_VQABS_s32,
  3248. NULL,
  3249. };
  3250. return do_2misc(s, a, fn[a->size]);
  3251. }
  3252. static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
  3253. {
  3254. static NeonGenOneOpFn * const fn[] = {
  3255. gen_VQNEG_s8,
  3256. gen_VQNEG_s16,
  3257. gen_VQNEG_s32,
  3258. NULL,
  3259. };
  3260. return do_2misc(s, a, fn[a->size]);
  3261. }
  3262. static bool do_2misc_fp(DisasContext *s, arg_2misc *a,
  3263. NeonGenOneSingleOpFn *fn)
  3264. {
  3265. int pass;
  3266. TCGv_ptr fpst;
  3267. /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
  3268. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  3269. return false;
  3270. }
  3271. /* UNDEF accesses to D16-D31 if they don't exist. */
  3272. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3273. ((a->vd | a->vm) & 0x10)) {
  3274. return false;
  3275. }
  3276. if (a->size != 2) {
  3277. /* TODO: FP16 will be the size == 1 case */
  3278. return false;
  3279. }
  3280. if ((a->vd | a->vm) & a->q) {
  3281. return false;
  3282. }
  3283. if (!vfp_access_check(s)) {
  3284. return true;
  3285. }
  3286. fpst = get_fpstatus_ptr(1);
  3287. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  3288. TCGv_i32 tmp = neon_load_reg(a->vm, pass);
  3289. fn(tmp, tmp, fpst);
  3290. neon_store_reg(a->vd, pass, tmp);
  3291. }
  3292. tcg_temp_free_ptr(fpst);
  3293. return true;
  3294. }
  3295. #define DO_2MISC_FP(INSN, FUNC) \
  3296. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3297. { \
  3298. return do_2misc_fp(s, a, FUNC); \
  3299. }
  3300. DO_2MISC_FP(VRECPE_F, gen_helper_recpe_f32)
  3301. DO_2MISC_FP(VRSQRTE_F, gen_helper_rsqrte_f32)
  3302. DO_2MISC_FP(VCVT_FS, gen_helper_vfp_sitos)
  3303. DO_2MISC_FP(VCVT_FU, gen_helper_vfp_uitos)
  3304. DO_2MISC_FP(VCVT_SF, gen_helper_vfp_tosizs)
  3305. DO_2MISC_FP(VCVT_UF, gen_helper_vfp_touizs)
  3306. static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
  3307. {
  3308. if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
  3309. return false;
  3310. }
  3311. return do_2misc_fp(s, a, gen_helper_rints_exact);
  3312. }
  3313. #define WRAP_FP_CMP0_FWD(WRAPNAME, FUNC) \
  3314. static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
  3315. { \
  3316. TCGv_i32 zero = tcg_const_i32(0); \
  3317. FUNC(d, m, zero, fpst); \
  3318. tcg_temp_free_i32(zero); \
  3319. }
  3320. #define WRAP_FP_CMP0_REV(WRAPNAME, FUNC) \
  3321. static void WRAPNAME(TCGv_i32 d, TCGv_i32 m, TCGv_ptr fpst) \
  3322. { \
  3323. TCGv_i32 zero = tcg_const_i32(0); \
  3324. FUNC(d, zero, m, fpst); \
  3325. tcg_temp_free_i32(zero); \
  3326. }
  3327. #define DO_FP_CMP0(INSN, FUNC, REV) \
  3328. WRAP_FP_CMP0_##REV(gen_##INSN, FUNC) \
  3329. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3330. { \
  3331. return do_2misc_fp(s, a, gen_##INSN); \
  3332. }
  3333. DO_FP_CMP0(VCGT0_F, gen_helper_neon_cgt_f32, FWD)
  3334. DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
  3335. DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
  3336. DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
  3337. DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
  3338. static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode)
  3339. {
  3340. /*
  3341. * Handle a VRINT* operation by iterating 32 bits at a time,
  3342. * with a specified rounding mode in operation.
  3343. */
  3344. int pass;
  3345. TCGv_ptr fpst;
  3346. TCGv_i32 tcg_rmode;
  3347. if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
  3348. !arm_dc_feature(s, ARM_FEATURE_V8)) {
  3349. return false;
  3350. }
  3351. /* UNDEF accesses to D16-D31 if they don't exist. */
  3352. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3353. ((a->vd | a->vm) & 0x10)) {
  3354. return false;
  3355. }
  3356. if (a->size != 2) {
  3357. /* TODO: FP16 will be the size == 1 case */
  3358. return false;
  3359. }
  3360. if ((a->vd | a->vm) & a->q) {
  3361. return false;
  3362. }
  3363. if (!vfp_access_check(s)) {
  3364. return true;
  3365. }
  3366. fpst = get_fpstatus_ptr(1);
  3367. tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
  3368. gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
  3369. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  3370. TCGv_i32 tmp = neon_load_reg(a->vm, pass);
  3371. gen_helper_rints(tmp, tmp, fpst);
  3372. neon_store_reg(a->vd, pass, tmp);
  3373. }
  3374. gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
  3375. tcg_temp_free_i32(tcg_rmode);
  3376. tcg_temp_free_ptr(fpst);
  3377. return true;
  3378. }
  3379. #define DO_VRINT(INSN, RMODE) \
  3380. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3381. { \
  3382. return do_vrint(s, a, RMODE); \
  3383. }
  3384. DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
  3385. DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
  3386. DO_VRINT(VRINTZ, FPROUNDING_ZERO)
  3387. DO_VRINT(VRINTM, FPROUNDING_NEGINF)
  3388. DO_VRINT(VRINTP, FPROUNDING_POSINF)
  3389. static bool do_vcvt(DisasContext *s, arg_2misc *a, int rmode, bool is_signed)
  3390. {
  3391. /*
  3392. * Handle a VCVT* operation by iterating 32 bits at a time,
  3393. * with a specified rounding mode in operation.
  3394. */
  3395. int pass;
  3396. TCGv_ptr fpst;
  3397. TCGv_i32 tcg_rmode, tcg_shift;
  3398. if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
  3399. !arm_dc_feature(s, ARM_FEATURE_V8)) {
  3400. return false;
  3401. }
  3402. /* UNDEF accesses to D16-D31 if they don't exist. */
  3403. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3404. ((a->vd | a->vm) & 0x10)) {
  3405. return false;
  3406. }
  3407. if (a->size != 2) {
  3408. /* TODO: FP16 will be the size == 1 case */
  3409. return false;
  3410. }
  3411. if ((a->vd | a->vm) & a->q) {
  3412. return false;
  3413. }
  3414. if (!vfp_access_check(s)) {
  3415. return true;
  3416. }
  3417. fpst = get_fpstatus_ptr(1);
  3418. tcg_shift = tcg_const_i32(0);
  3419. tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
  3420. gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
  3421. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  3422. TCGv_i32 tmp = neon_load_reg(a->vm, pass);
  3423. if (is_signed) {
  3424. gen_helper_vfp_tosls(tmp, tmp, tcg_shift, fpst);
  3425. } else {
  3426. gen_helper_vfp_touls(tmp, tmp, tcg_shift, fpst);
  3427. }
  3428. neon_store_reg(a->vd, pass, tmp);
  3429. }
  3430. gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode, cpu_env);
  3431. tcg_temp_free_i32(tcg_rmode);
  3432. tcg_temp_free_i32(tcg_shift);
  3433. tcg_temp_free_ptr(fpst);
  3434. return true;
  3435. }
  3436. #define DO_VCVT(INSN, RMODE, SIGNED) \
  3437. static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
  3438. { \
  3439. return do_vcvt(s, a, RMODE, SIGNED); \
  3440. }
  3441. DO_VCVT(VCVTAU, FPROUNDING_TIEAWAY, false)
  3442. DO_VCVT(VCVTAS, FPROUNDING_TIEAWAY, true)
  3443. DO_VCVT(VCVTNU, FPROUNDING_TIEEVEN, false)
  3444. DO_VCVT(VCVTNS, FPROUNDING_TIEEVEN, true)
  3445. DO_VCVT(VCVTPU, FPROUNDING_POSINF, false)
  3446. DO_VCVT(VCVTPS, FPROUNDING_POSINF, true)
  3447. DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false)
  3448. DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true)
  3449. static bool trans_VSWP(DisasContext *s, arg_2misc *a)
  3450. {
  3451. TCGv_i64 rm, rd;
  3452. int pass;
  3453. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  3454. return false;
  3455. }
  3456. /* UNDEF accesses to D16-D31 if they don't exist. */
  3457. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3458. ((a->vd | a->vm) & 0x10)) {
  3459. return false;
  3460. }
  3461. if (a->size != 0) {
  3462. return false;
  3463. }
  3464. if ((a->vd | a->vm) & a->q) {
  3465. return false;
  3466. }
  3467. if (!vfp_access_check(s)) {
  3468. return true;
  3469. }
  3470. rm = tcg_temp_new_i64();
  3471. rd = tcg_temp_new_i64();
  3472. for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
  3473. neon_load_reg64(rm, a->vm + pass);
  3474. neon_load_reg64(rd, a->vd + pass);
  3475. neon_store_reg64(rm, a->vd + pass);
  3476. neon_store_reg64(rd, a->vm + pass);
  3477. }
  3478. tcg_temp_free_i64(rm);
  3479. tcg_temp_free_i64(rd);
  3480. return true;
  3481. }
  3482. static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
  3483. {
  3484. TCGv_i32 rd, tmp;
  3485. rd = tcg_temp_new_i32();
  3486. tmp = tcg_temp_new_i32();
  3487. tcg_gen_shli_i32(rd, t0, 8);
  3488. tcg_gen_andi_i32(rd, rd, 0xff00ff00);
  3489. tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
  3490. tcg_gen_or_i32(rd, rd, tmp);
  3491. tcg_gen_shri_i32(t1, t1, 8);
  3492. tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
  3493. tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
  3494. tcg_gen_or_i32(t1, t1, tmp);
  3495. tcg_gen_mov_i32(t0, rd);
  3496. tcg_temp_free_i32(tmp);
  3497. tcg_temp_free_i32(rd);
  3498. }
  3499. static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
  3500. {
  3501. TCGv_i32 rd, tmp;
  3502. rd = tcg_temp_new_i32();
  3503. tmp = tcg_temp_new_i32();
  3504. tcg_gen_shli_i32(rd, t0, 16);
  3505. tcg_gen_andi_i32(tmp, t1, 0xffff);
  3506. tcg_gen_or_i32(rd, rd, tmp);
  3507. tcg_gen_shri_i32(t1, t1, 16);
  3508. tcg_gen_andi_i32(tmp, t0, 0xffff0000);
  3509. tcg_gen_or_i32(t1, t1, tmp);
  3510. tcg_gen_mov_i32(t0, rd);
  3511. tcg_temp_free_i32(tmp);
  3512. tcg_temp_free_i32(rd);
  3513. }
  3514. static bool trans_VTRN(DisasContext *s, arg_2misc *a)
  3515. {
  3516. TCGv_i32 tmp, tmp2;
  3517. int pass;
  3518. if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
  3519. return false;
  3520. }
  3521. /* UNDEF accesses to D16-D31 if they don't exist. */
  3522. if (!dc_isar_feature(aa32_simd_r32, s) &&
  3523. ((a->vd | a->vm) & 0x10)) {
  3524. return false;
  3525. }
  3526. if ((a->vd | a->vm) & a->q) {
  3527. return false;
  3528. }
  3529. if (a->size == 3) {
  3530. return false;
  3531. }
  3532. if (!vfp_access_check(s)) {
  3533. return true;
  3534. }
  3535. if (a->size == 2) {
  3536. for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
  3537. tmp = neon_load_reg(a->vm, pass);
  3538. tmp2 = neon_load_reg(a->vd, pass + 1);
  3539. neon_store_reg(a->vm, pass, tmp2);
  3540. neon_store_reg(a->vd, pass + 1, tmp);
  3541. }
  3542. } else {
  3543. for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
  3544. tmp = neon_load_reg(a->vm, pass);
  3545. tmp2 = neon_load_reg(a->vd, pass);
  3546. if (a->size == 0) {
  3547. gen_neon_trn_u8(tmp, tmp2);
  3548. } else {
  3549. gen_neon_trn_u16(tmp, tmp2);
  3550. }
  3551. neon_store_reg(a->vm, pass, tmp2);
  3552. neon_store_reg(a->vd, pass, tmp);
  3553. }
  3554. }
  3555. return true;
  3556. }