CGBuiltin.cpp 468 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291
  1. //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This contains code to emit Builtin calls as LLVM code.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "CGCXXABI.h"
  14. #include "CGObjCRuntime.h"
  15. #include "CGOpenCLRuntime.h"
  16. #include "CGRecordLayout.h"
  17. #include "CodeGenFunction.h"
  18. #include "CodeGenModule.h"
  19. #include "ConstantEmitter.h"
  20. #include "TargetInfo.h"
  21. #include "clang/AST/ASTContext.h"
  22. #include "clang/AST/Decl.h"
  23. #include "clang/Analysis/Analyses/OSLog.h"
  24. #include "clang/Basic/TargetBuiltins.h"
  25. #include "clang/Basic/TargetInfo.h"
  26. #include "clang/CodeGen/CGFunctionInfo.h"
  27. #include "llvm/ADT/StringExtras.h"
  28. #include "llvm/IR/CallSite.h"
  29. #include "llvm/IR/DataLayout.h"
  30. #include "llvm/IR/InlineAsm.h"
  31. #include "llvm/IR/Intrinsics.h"
  32. #include "llvm/IR/MDBuilder.h"
  33. #include "llvm/Support/ConvertUTF.h"
  34. #include "llvm/Support/ScopedPrinter.h"
  35. #include "llvm/Support/TargetParser.h"
  36. #include <sstream>
  37. using namespace clang;
  38. using namespace CodeGen;
  39. using namespace llvm;
  40. static
  41. int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
  42. return std::min(High, std::max(Low, Value));
  43. }
  44. /// getBuiltinLibFunction - Given a builtin id for a function like
  45. /// "__builtin_fabsf", return a Function* for "fabsf".
  46. llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
  47. unsigned BuiltinID) {
  48. assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
  49. // Get the name, skip over the __builtin_ prefix (if necessary).
  50. StringRef Name;
  51. GlobalDecl D(FD);
  52. // If the builtin has been declared explicitly with an assembler label,
  53. // use the mangled name. This differs from the plain label on platforms
  54. // that prefix labels.
  55. if (FD->hasAttr<AsmLabelAttr>())
  56. Name = getMangledName(D);
  57. else
  58. Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
  59. llvm::FunctionType *Ty =
  60. cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
  61. return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
  62. }
  63. /// Emit the conversions required to turn the given value into an
  64. /// integer of the given size.
  65. static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
  66. QualType T, llvm::IntegerType *IntType) {
  67. V = CGF.EmitToMemory(V, T);
  68. if (V->getType()->isPointerTy())
  69. return CGF.Builder.CreatePtrToInt(V, IntType);
  70. assert(V->getType() == IntType);
  71. return V;
  72. }
  73. static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
  74. QualType T, llvm::Type *ResultType) {
  75. V = CGF.EmitFromMemory(V, T);
  76. if (ResultType->isPointerTy())
  77. return CGF.Builder.CreateIntToPtr(V, ResultType);
  78. assert(V->getType() == ResultType);
  79. return V;
  80. }
  81. /// Utility to insert an atomic instruction based on Instrinsic::ID
  82. /// and the expression node.
  83. static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
  84. llvm::AtomicRMWInst::BinOp Kind,
  85. const CallExpr *E) {
  86. QualType T = E->getType();
  87. assert(E->getArg(0)->getType()->isPointerType());
  88. assert(CGF.getContext().hasSameUnqualifiedType(T,
  89. E->getArg(0)->getType()->getPointeeType()));
  90. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  91. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  92. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  93. llvm::IntegerType *IntType =
  94. llvm::IntegerType::get(CGF.getLLVMContext(),
  95. CGF.getContext().getTypeSize(T));
  96. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  97. llvm::Value *Args[2];
  98. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  99. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  100. llvm::Type *ValueType = Args[1]->getType();
  101. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  102. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  103. Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
  104. return EmitFromInt(CGF, Result, T, ValueType);
  105. }
  106. static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
  107. Value *Val = CGF.EmitScalarExpr(E->getArg(0));
  108. Value *Address = CGF.EmitScalarExpr(E->getArg(1));
  109. // Convert the type of the pointer to a pointer to the stored type.
  110. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
  111. Value *BC = CGF.Builder.CreateBitCast(
  112. Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
  113. LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
  114. LV.setNontemporal(true);
  115. CGF.EmitStoreOfScalar(Val, LV, false);
  116. return nullptr;
  117. }
  118. static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
  119. Value *Address = CGF.EmitScalarExpr(E->getArg(0));
  120. LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
  121. LV.setNontemporal(true);
  122. return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
  123. }
  124. static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
  125. llvm::AtomicRMWInst::BinOp Kind,
  126. const CallExpr *E) {
  127. return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
  128. }
  129. /// Utility to insert an atomic instruction based Instrinsic::ID and
  130. /// the expression node, where the return value is the result of the
  131. /// operation.
  132. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
  133. llvm::AtomicRMWInst::BinOp Kind,
  134. const CallExpr *E,
  135. Instruction::BinaryOps Op,
  136. bool Invert = false) {
  137. QualType T = E->getType();
  138. assert(E->getArg(0)->getType()->isPointerType());
  139. assert(CGF.getContext().hasSameUnqualifiedType(T,
  140. E->getArg(0)->getType()->getPointeeType()));
  141. assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
  142. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  143. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  144. llvm::IntegerType *IntType =
  145. llvm::IntegerType::get(CGF.getLLVMContext(),
  146. CGF.getContext().getTypeSize(T));
  147. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  148. llvm::Value *Args[2];
  149. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  150. llvm::Type *ValueType = Args[1]->getType();
  151. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  152. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  153. llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
  154. Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
  155. Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
  156. if (Invert)
  157. Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
  158. llvm::ConstantInt::get(IntType, -1));
  159. Result = EmitFromInt(CGF, Result, T, ValueType);
  160. return RValue::get(Result);
  161. }
  162. /// @brief Utility to insert an atomic cmpxchg instruction.
  163. ///
  164. /// @param CGF The current codegen function.
  165. /// @param E Builtin call expression to convert to cmpxchg.
  166. /// arg0 - address to operate on
  167. /// arg1 - value to compare with
  168. /// arg2 - new value
  169. /// @param ReturnBool Specifies whether to return success flag of
  170. /// cmpxchg result or the old value.
  171. ///
  172. /// @returns result of cmpxchg, according to ReturnBool
  173. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
  174. bool ReturnBool) {
  175. QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
  176. llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
  177. unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
  178. llvm::IntegerType *IntType = llvm::IntegerType::get(
  179. CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
  180. llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
  181. Value *Args[3];
  182. Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
  183. Args[1] = CGF.EmitScalarExpr(E->getArg(1));
  184. llvm::Type *ValueType = Args[1]->getType();
  185. Args[1] = EmitToInt(CGF, Args[1], T, IntType);
  186. Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
  187. Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
  188. Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
  189. llvm::AtomicOrdering::SequentiallyConsistent);
  190. if (ReturnBool)
  191. // Extract boolean success flag and zext it to int.
  192. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
  193. CGF.ConvertType(E->getType()));
  194. else
  195. // Extract old value and emit it using the same type as compare value.
  196. return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
  197. ValueType);
  198. }
  199. // Emit a simple mangled intrinsic that has 1 argument and a return type
  200. // matching the argument type.
  201. static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
  202. const CallExpr *E,
  203. unsigned IntrinsicID) {
  204. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  205. Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  206. return CGF.Builder.CreateCall(F, Src0);
  207. }
  208. // Emit an intrinsic that has 2 operands of the same type as its result.
  209. static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
  210. const CallExpr *E,
  211. unsigned IntrinsicID) {
  212. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  213. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  214. Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  215. return CGF.Builder.CreateCall(F, { Src0, Src1 });
  216. }
  217. // Emit an intrinsic that has 3 operands of the same type as its result.
  218. static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
  219. const CallExpr *E,
  220. unsigned IntrinsicID) {
  221. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  222. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  223. llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
  224. Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  225. return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
  226. }
  227. // Emit an intrinsic that has 1 float or double operand, and 1 integer.
  228. static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
  229. const CallExpr *E,
  230. unsigned IntrinsicID) {
  231. llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
  232. llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
  233. Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
  234. return CGF.Builder.CreateCall(F, {Src0, Src1});
  235. }
  236. /// EmitFAbs - Emit a call to @llvm.fabs().
  237. static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
  238. Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
  239. llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
  240. Call->setDoesNotAccessMemory();
  241. return Call;
  242. }
  243. /// Emit the computation of the sign bit for a floating point value. Returns
  244. /// the i1 sign bit value.
  245. static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
  246. LLVMContext &C = CGF.CGM.getLLVMContext();
  247. llvm::Type *Ty = V->getType();
  248. int Width = Ty->getPrimitiveSizeInBits();
  249. llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
  250. V = CGF.Builder.CreateBitCast(V, IntTy);
  251. if (Ty->isPPC_FP128Ty()) {
  252. // We want the sign bit of the higher-order double. The bitcast we just
  253. // did works as if the double-double was stored to memory and then
  254. // read as an i128. The "store" will put the higher-order double in the
  255. // lower address in both little- and big-Endian modes, but the "load"
  256. // will treat those bits as a different part of the i128: the low bits in
  257. // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
  258. // we need to shift the high bits down to the low before truncating.
  259. Width >>= 1;
  260. if (CGF.getTarget().isBigEndian()) {
  261. Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
  262. V = CGF.Builder.CreateLShr(V, ShiftCst);
  263. }
  264. // We are truncating value in order to extract the higher-order
  265. // double, which we will be using to extract the sign from.
  266. IntTy = llvm::IntegerType::get(C, Width);
  267. V = CGF.Builder.CreateTrunc(V, IntTy);
  268. }
  269. Value *Zero = llvm::Constant::getNullValue(IntTy);
  270. return CGF.Builder.CreateICmpSLT(V, Zero);
  271. }
  272. static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
  273. const CallExpr *E, llvm::Constant *calleeValue) {
  274. CGCallee callee = CGCallee::forDirect(calleeValue, FD);
  275. return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
  276. }
  277. /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
  278. /// depending on IntrinsicID.
  279. ///
  280. /// \arg CGF The current codegen function.
  281. /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
  282. /// \arg X The first argument to the llvm.*.with.overflow.*.
  283. /// \arg Y The second argument to the llvm.*.with.overflow.*.
  284. /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
  285. /// \returns The result (i.e. sum/product) returned by the intrinsic.
  286. static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
  287. const llvm::Intrinsic::ID IntrinsicID,
  288. llvm::Value *X, llvm::Value *Y,
  289. llvm::Value *&Carry) {
  290. // Make sure we have integers of the same width.
  291. assert(X->getType() == Y->getType() &&
  292. "Arguments must be the same type. (Did you forget to make sure both "
  293. "arguments have the same integer width?)");
  294. llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
  295. llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
  296. Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
  297. return CGF.Builder.CreateExtractValue(Tmp, 0);
  298. }
  299. static Value *emitRangedBuiltin(CodeGenFunction &CGF,
  300. unsigned IntrinsicID,
  301. int low, int high) {
  302. llvm::MDBuilder MDHelper(CGF.getLLVMContext());
  303. llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
  304. Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
  305. llvm::Instruction *Call = CGF.Builder.CreateCall(F);
  306. Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
  307. return Call;
  308. }
  309. namespace {
  310. struct WidthAndSignedness {
  311. unsigned Width;
  312. bool Signed;
  313. };
  314. }
  315. static WidthAndSignedness
  316. getIntegerWidthAndSignedness(const clang::ASTContext &context,
  317. const clang::QualType Type) {
  318. assert(Type->isIntegerType() && "Given type is not an integer.");
  319. unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
  320. bool Signed = Type->isSignedIntegerType();
  321. return {Width, Signed};
  322. }
  323. // Given one or more integer types, this function produces an integer type that
  324. // encompasses them: any value in one of the given types could be expressed in
  325. // the encompassing type.
  326. static struct WidthAndSignedness
  327. EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
  328. assert(Types.size() > 0 && "Empty list of types.");
  329. // If any of the given types is signed, we must return a signed type.
  330. bool Signed = false;
  331. for (const auto &Type : Types) {
  332. Signed |= Type.Signed;
  333. }
  334. // The encompassing type must have a width greater than or equal to the width
  335. // of the specified types. Additionally, if the encompassing type is signed,
  336. // its width must be strictly greater than the width of any unsigned types
  337. // given.
  338. unsigned Width = 0;
  339. for (const auto &Type : Types) {
  340. unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
  341. if (Width < MinWidth) {
  342. Width = MinWidth;
  343. }
  344. }
  345. return {Width, Signed};
  346. }
  347. Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
  348. llvm::Type *DestType = Int8PtrTy;
  349. if (ArgValue->getType() != DestType)
  350. ArgValue =
  351. Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
  352. Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
  353. return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
  354. }
  355. /// Checks if using the result of __builtin_object_size(p, @p From) in place of
  356. /// __builtin_object_size(p, @p To) is correct
  357. static bool areBOSTypesCompatible(int From, int To) {
  358. // Note: Our __builtin_object_size implementation currently treats Type=0 and
  359. // Type=2 identically. Encoding this implementation detail here may make
  360. // improving __builtin_object_size difficult in the future, so it's omitted.
  361. return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
  362. }
  363. static llvm::Value *
  364. getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
  365. return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
  366. }
  367. llvm::Value *
  368. CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
  369. llvm::IntegerType *ResType,
  370. llvm::Value *EmittedE) {
  371. uint64_t ObjectSize;
  372. if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
  373. return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
  374. return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
  375. }
  376. /// Returns a Value corresponding to the size of the given expression.
  377. /// This Value may be either of the following:
  378. /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
  379. /// it)
  380. /// - A call to the @llvm.objectsize intrinsic
  381. ///
  382. /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
  383. /// and we wouldn't otherwise try to reference a pass_object_size parameter,
  384. /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
  385. llvm::Value *
  386. CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
  387. llvm::IntegerType *ResType,
  388. llvm::Value *EmittedE) {
  389. // We need to reference an argument if the pointer is a parameter with the
  390. // pass_object_size attribute.
  391. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
  392. auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
  393. auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
  394. if (Param != nullptr && PS != nullptr &&
  395. areBOSTypesCompatible(PS->getType(), Type)) {
  396. auto Iter = SizeArguments.find(Param);
  397. assert(Iter != SizeArguments.end());
  398. const ImplicitParamDecl *D = Iter->second;
  399. auto DIter = LocalDeclMap.find(D);
  400. assert(DIter != LocalDeclMap.end());
  401. return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
  402. getContext().getSizeType(), E->getLocStart());
  403. }
  404. }
  405. // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
  406. // evaluate E for side-effects. In either case, we shouldn't lower to
  407. // @llvm.objectsize.
  408. if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
  409. return getDefaultBuiltinObjectSizeResult(Type, ResType);
  410. Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
  411. assert(Ptr->getType()->isPointerTy() &&
  412. "Non-pointer passed to __builtin_object_size?");
  413. Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
  414. // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
  415. Value *Min = Builder.getInt1((Type & 2) != 0);
  416. // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
  417. Value *NullIsUnknown = Builder.getTrue();
  418. return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
  419. }
  420. // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
  421. // handle them here.
  422. enum class CodeGenFunction::MSVCIntrin {
  423. _BitScanForward,
  424. _BitScanReverse,
  425. _InterlockedAnd,
  426. _InterlockedDecrement,
  427. _InterlockedExchange,
  428. _InterlockedExchangeAdd,
  429. _InterlockedExchangeSub,
  430. _InterlockedIncrement,
  431. _InterlockedOr,
  432. _InterlockedXor,
  433. _interlockedbittestandset,
  434. __fastfail,
  435. };
  436. Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
  437. const CallExpr *E) {
  438. switch (BuiltinID) {
  439. case MSVCIntrin::_BitScanForward:
  440. case MSVCIntrin::_BitScanReverse: {
  441. Value *ArgValue = EmitScalarExpr(E->getArg(1));
  442. llvm::Type *ArgType = ArgValue->getType();
  443. llvm::Type *IndexType =
  444. EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
  445. llvm::Type *ResultType = ConvertType(E->getType());
  446. Value *ArgZero = llvm::Constant::getNullValue(ArgType);
  447. Value *ResZero = llvm::Constant::getNullValue(ResultType);
  448. Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
  449. BasicBlock *Begin = Builder.GetInsertBlock();
  450. BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
  451. Builder.SetInsertPoint(End);
  452. PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
  453. Builder.SetInsertPoint(Begin);
  454. Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
  455. BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
  456. Builder.CreateCondBr(IsZero, End, NotZero);
  457. Result->addIncoming(ResZero, Begin);
  458. Builder.SetInsertPoint(NotZero);
  459. Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
  460. if (BuiltinID == MSVCIntrin::_BitScanForward) {
  461. Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  462. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  463. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  464. Builder.CreateStore(ZeroCount, IndexAddress, false);
  465. } else {
  466. unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
  467. Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
  468. Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  469. Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
  470. ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
  471. Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
  472. Builder.CreateStore(Index, IndexAddress, false);
  473. }
  474. Builder.CreateBr(End);
  475. Result->addIncoming(ResOne, NotZero);
  476. Builder.SetInsertPoint(End);
  477. return Result;
  478. }
  479. case MSVCIntrin::_InterlockedAnd:
  480. return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
  481. case MSVCIntrin::_InterlockedExchange:
  482. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
  483. case MSVCIntrin::_InterlockedExchangeAdd:
  484. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
  485. case MSVCIntrin::_InterlockedExchangeSub:
  486. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
  487. case MSVCIntrin::_InterlockedOr:
  488. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
  489. case MSVCIntrin::_InterlockedXor:
  490. return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
  491. case MSVCIntrin::_interlockedbittestandset: {
  492. llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
  493. llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
  494. AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
  495. AtomicRMWInst::Or, Addr,
  496. Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
  497. llvm::AtomicOrdering::SequentiallyConsistent);
  498. // Shift the relevant bit to the least significant position, truncate to
  499. // the result type, and test the low bit.
  500. llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
  501. llvm::Value *Truncated =
  502. Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
  503. return Builder.CreateAnd(Truncated,
  504. ConstantInt::get(Truncated->getType(), 1));
  505. }
  506. case MSVCIntrin::_InterlockedDecrement: {
  507. llvm::Type *IntTy = ConvertType(E->getType());
  508. AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
  509. AtomicRMWInst::Sub,
  510. EmitScalarExpr(E->getArg(0)),
  511. ConstantInt::get(IntTy, 1),
  512. llvm::AtomicOrdering::SequentiallyConsistent);
  513. return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
  514. }
  515. case MSVCIntrin::_InterlockedIncrement: {
  516. llvm::Type *IntTy = ConvertType(E->getType());
  517. AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
  518. AtomicRMWInst::Add,
  519. EmitScalarExpr(E->getArg(0)),
  520. ConstantInt::get(IntTy, 1),
  521. llvm::AtomicOrdering::SequentiallyConsistent);
  522. return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
  523. }
  524. case MSVCIntrin::__fastfail: {
  525. // Request immediate process termination from the kernel. The instruction
  526. // sequences to do this are documented on MSDN:
  527. // https://msdn.microsoft.com/en-us/library/dn774154.aspx
  528. llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
  529. StringRef Asm, Constraints;
  530. switch (ISA) {
  531. default:
  532. ErrorUnsupported(E, "__fastfail call for this architecture");
  533. break;
  534. case llvm::Triple::x86:
  535. case llvm::Triple::x86_64:
  536. Asm = "int $$0x29";
  537. Constraints = "{cx}";
  538. break;
  539. case llvm::Triple::thumb:
  540. Asm = "udf #251";
  541. Constraints = "{r0}";
  542. break;
  543. }
  544. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
  545. llvm::InlineAsm *IA =
  546. llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
  547. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  548. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  549. llvm::Attribute::NoReturn);
  550. CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
  551. CS.setAttributes(NoReturnAttr);
  552. return CS.getInstruction();
  553. }
  554. }
  555. llvm_unreachable("Incorrect MSVC intrinsic!");
  556. }
  557. namespace {
  558. // ARC cleanup for __builtin_os_log_format
  559. struct CallObjCArcUse final : EHScopeStack::Cleanup {
  560. CallObjCArcUse(llvm::Value *object) : object(object) {}
  561. llvm::Value *object;
  562. void Emit(CodeGenFunction &CGF, Flags flags) override {
  563. CGF.EmitARCIntrinsicUse(object);
  564. }
  565. };
  566. }
  567. Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
  568. BuiltinCheckKind Kind) {
  569. assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
  570. && "Unsupported builtin check kind");
  571. Value *ArgValue = EmitScalarExpr(E);
  572. if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
  573. return ArgValue;
  574. SanitizerScope SanScope(this);
  575. Value *Cond = Builder.CreateICmpNE(
  576. ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
  577. EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
  578. SanitizerHandler::InvalidBuiltin,
  579. {EmitCheckSourceLocation(E->getExprLoc()),
  580. llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
  581. None);
  582. return ArgValue;
  583. }
  584. /// Get the argument type for arguments to os_log_helper.
  585. static CanQualType getOSLogArgType(ASTContext &C, int Size) {
  586. QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
  587. return C.getCanonicalType(UnsignedTy);
  588. }
  589. llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
  590. const analyze_os_log::OSLogBufferLayout &Layout,
  591. CharUnits BufferAlignment) {
  592. ASTContext &Ctx = getContext();
  593. llvm::SmallString<64> Name;
  594. {
  595. raw_svector_ostream OS(Name);
  596. OS << "__os_log_helper";
  597. OS << "_" << BufferAlignment.getQuantity();
  598. OS << "_" << int(Layout.getSummaryByte());
  599. OS << "_" << int(Layout.getNumArgsByte());
  600. for (const auto &Item : Layout.Items)
  601. OS << "_" << int(Item.getSizeByte()) << "_"
  602. << int(Item.getDescriptorByte());
  603. }
  604. if (llvm::Function *F = CGM.getModule().getFunction(Name))
  605. return F;
  606. llvm::SmallVector<ImplicitParamDecl, 4> Params;
  607. Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
  608. Ctx.VoidPtrTy, ImplicitParamDecl::Other);
  609. for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
  610. char Size = Layout.Items[I].getSizeByte();
  611. if (!Size)
  612. continue;
  613. Params.emplace_back(
  614. Ctx, nullptr, SourceLocation(),
  615. &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
  616. getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
  617. }
  618. FunctionArgList Args;
  619. for (auto &P : Params)
  620. Args.push_back(&P);
  621. // The helper function has linkonce_odr linkage to enable the linker to merge
  622. // identical functions. To ensure the merging always happens, 'noinline' is
  623. // attached to the function when compiling with -Oz.
  624. const CGFunctionInfo &FI =
  625. CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
  626. llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
  627. llvm::Function *Fn = llvm::Function::Create(
  628. FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
  629. Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
  630. CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
  631. CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
  632. // Attach 'noinline' at -Oz.
  633. if (CGM.getCodeGenOpts().OptimizeSize == 2)
  634. Fn->addFnAttr(llvm::Attribute::NoInline);
  635. auto NL = ApplyDebugLocation::CreateEmpty(*this);
  636. IdentifierInfo *II = &Ctx.Idents.get(Name);
  637. FunctionDecl *FD = FunctionDecl::Create(
  638. Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
  639. Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
  640. StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
  641. // Create a scope with an artificial location for the body of this function.
  642. auto AL = ApplyDebugLocation::CreateArtificial(*this);
  643. CharUnits Offset;
  644. Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
  645. BufferAlignment);
  646. Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
  647. Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
  648. Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
  649. Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
  650. unsigned I = 1;
  651. for (const auto &Item : Layout.Items) {
  652. Builder.CreateStore(
  653. Builder.getInt8(Item.getDescriptorByte()),
  654. Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
  655. Builder.CreateStore(
  656. Builder.getInt8(Item.getSizeByte()),
  657. Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
  658. CharUnits Size = Item.size();
  659. if (!Size.getQuantity())
  660. continue;
  661. Address Arg = GetAddrOfLocalVar(&Params[I]);
  662. Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
  663. Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
  664. "argDataCast");
  665. Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
  666. Offset += Size;
  667. ++I;
  668. }
  669. FinishFunction();
  670. return Fn;
  671. }
  672. RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
  673. assert(E.getNumArgs() >= 2 &&
  674. "__builtin_os_log_format takes at least 2 arguments");
  675. ASTContext &Ctx = getContext();
  676. analyze_os_log::OSLogBufferLayout Layout;
  677. analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
  678. Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
  679. llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
  680. // Ignore argument 1, the format string. It is not currently used.
  681. CallArgList Args;
  682. Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
  683. for (const auto &Item : Layout.Items) {
  684. int Size = Item.getSizeByte();
  685. if (!Size)
  686. continue;
  687. llvm::Value *ArgVal;
  688. if (const Expr *TheExpr = Item.getExpr()) {
  689. ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
  690. // Check if this is a retainable type.
  691. if (TheExpr->getType()->isObjCRetainableType()) {
  692. assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
  693. "Only scalar can be a ObjC retainable type");
  694. // Check if the object is constant, if not, save it in
  695. // RetainableOperands.
  696. if (!isa<Constant>(ArgVal))
  697. RetainableOperands.push_back(ArgVal);
  698. }
  699. } else {
  700. ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
  701. }
  702. unsigned ArgValSize =
  703. CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
  704. llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
  705. ArgValSize);
  706. ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
  707. CanQualType ArgTy = getOSLogArgType(Ctx, Size);
  708. // If ArgVal has type x86_fp80, zero-extend ArgVal.
  709. ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
  710. Args.add(RValue::get(ArgVal), ArgTy);
  711. }
  712. const CGFunctionInfo &FI =
  713. CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
  714. llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
  715. Layout, BufAddr.getAlignment());
  716. EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
  717. // Push a clang.arc.use cleanup for each object in RetainableOperands. The
  718. // cleanup will cause the use to appear after the final log call, keeping
  719. // the object valid while it’s held in the log buffer. Note that if there’s
  720. // a release cleanup on the object, it will already be active; since
  721. // cleanups are emitted in reverse order, the use will occur before the
  722. // object is released.
  723. if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
  724. CGM.getCodeGenOpts().OptimizationLevel != 0)
  725. for (llvm::Value *Object : RetainableOperands)
  726. pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
  727. return RValue::get(BufAddr.getPointer());
  728. }
  729. /// Determine if a binop is a checked mixed-sign multiply we can specialize.
  730. static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
  731. WidthAndSignedness Op1Info,
  732. WidthAndSignedness Op2Info,
  733. WidthAndSignedness ResultInfo) {
  734. return BuiltinID == Builtin::BI__builtin_mul_overflow &&
  735. Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
  736. Op1Info.Signed != Op2Info.Signed;
  737. }
  738. /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
  739. /// the generic checked-binop irgen.
  740. static RValue
  741. EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
  742. WidthAndSignedness Op1Info, const clang::Expr *Op2,
  743. WidthAndSignedness Op2Info,
  744. const clang::Expr *ResultArg, QualType ResultQTy,
  745. WidthAndSignedness ResultInfo) {
  746. assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
  747. Op2Info, ResultInfo) &&
  748. "Not a mixed-sign multipliction we can specialize");
  749. // Emit the signed and unsigned operands.
  750. const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
  751. const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
  752. llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
  753. llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
  754. llvm::Type *OpTy = Signed->getType();
  755. llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
  756. Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
  757. llvm::Type *ResTy = ResultPtr.getElementType();
  758. // Take the absolute value of the signed operand.
  759. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
  760. llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
  761. llvm::Value *AbsSigned =
  762. CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
  763. // Perform a checked unsigned multiplication.
  764. llvm::Value *UnsignedOverflow;
  765. llvm::Value *UnsignedResult =
  766. EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
  767. Unsigned, UnsignedOverflow);
  768. llvm::Value *Overflow, *Result;
  769. if (ResultInfo.Signed) {
  770. // Signed overflow occurs if the result is greater than INT_MAX or lesser
  771. // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
  772. auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
  773. .zextOrSelf(Op1Info.Width);
  774. llvm::Value *MaxResult =
  775. CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
  776. CGF.Builder.CreateZExt(IsNegative, OpTy));
  777. llvm::Value *SignedOverflow =
  778. CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
  779. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
  780. // Prepare the signed result (possibly by negating it).
  781. llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
  782. llvm::Value *SignedResult =
  783. CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
  784. Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
  785. } else {
  786. // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
  787. llvm::Value *Underflow = CGF.Builder.CreateAnd(
  788. IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
  789. Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
  790. if (ResultInfo.Width < Op1Info.Width) {
  791. auto IntMax =
  792. llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
  793. llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
  794. UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
  795. Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
  796. }
  797. // Negate the product if it would be negative in infinite precision.
  798. Result = CGF.Builder.CreateSelect(
  799. IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
  800. Result = CGF.Builder.CreateTrunc(Result, ResTy);
  801. }
  802. assert(Overflow && Result && "Missing overflow or result");
  803. bool isVolatile =
  804. ResultArg->getType()->getPointeeType().isVolatileQualified();
  805. CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
  806. isVolatile);
  807. return RValue::get(Overflow);
  808. }
  809. static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
  810. Value *&RecordPtr, CharUnits Align, Value *Func,
  811. int Lvl) {
  812. const auto *RT = RType->getAs<RecordType>();
  813. ASTContext &Context = CGF.getContext();
  814. RecordDecl *RD = RT->getDecl()->getDefinition();
  815. ASTContext &Ctx = RD->getASTContext();
  816. const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD);
  817. std::string Pad = std::string(Lvl * 4, ' ');
  818. Value *GString =
  819. CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
  820. Value *Res = CGF.Builder.CreateCall(Func, {GString});
  821. static llvm::DenseMap<QualType, const char *> Types;
  822. if (Types.empty()) {
  823. Types[Context.CharTy] = "%c";
  824. Types[Context.BoolTy] = "%d";
  825. Types[Context.SignedCharTy] = "%hhd";
  826. Types[Context.UnsignedCharTy] = "%hhu";
  827. Types[Context.IntTy] = "%d";
  828. Types[Context.UnsignedIntTy] = "%u";
  829. Types[Context.LongTy] = "%ld";
  830. Types[Context.UnsignedLongTy] = "%lu";
  831. Types[Context.LongLongTy] = "%lld";
  832. Types[Context.UnsignedLongLongTy] = "%llu";
  833. Types[Context.ShortTy] = "%hd";
  834. Types[Context.UnsignedShortTy] = "%hu";
  835. Types[Context.VoidPtrTy] = "%p";
  836. Types[Context.FloatTy] = "%f";
  837. Types[Context.DoubleTy] = "%f";
  838. Types[Context.LongDoubleTy] = "%Lf";
  839. Types[Context.getPointerType(Context.CharTy)] = "%s";
  840. Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
  841. }
  842. for (const auto *FD : RD->fields()) {
  843. uint64_t Off = RL.getFieldOffset(FD->getFieldIndex());
  844. Off = Ctx.toCharUnitsFromBits(Off).getQuantity();
  845. Value *FieldPtr = RecordPtr;
  846. if (RD->isUnion())
  847. FieldPtr = CGF.Builder.CreatePointerCast(
  848. FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
  849. else
  850. FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
  851. FD->getFieldIndex());
  852. GString = CGF.Builder.CreateGlobalStringPtr(
  853. llvm::Twine(Pad)
  854. .concat(FD->getType().getAsString())
  855. .concat(llvm::Twine(' '))
  856. .concat(FD->getNameAsString())
  857. .concat(" : ")
  858. .str());
  859. Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
  860. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  861. QualType CanonicalType =
  862. FD->getType().getUnqualifiedType().getCanonicalType();
  863. // We check whether we are in a recursive type
  864. if (CanonicalType->isRecordType()) {
  865. Value *TmpRes =
  866. dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
  867. Res = CGF.Builder.CreateAdd(TmpRes, Res);
  868. continue;
  869. }
  870. // We try to determine the best format to print the current field
  871. llvm::Twine Format = Types.find(CanonicalType) == Types.end()
  872. ? Types[Context.VoidPtrTy]
  873. : Types[CanonicalType];
  874. Address FieldAddress = Address(FieldPtr, Align);
  875. FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
  876. // FIXME Need to handle bitfield here
  877. GString = CGF.Builder.CreateGlobalStringPtr(
  878. Format.concat(llvm::Twine('\n')).str());
  879. TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
  880. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  881. }
  882. GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
  883. Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
  884. Res = CGF.Builder.CreateAdd(Res, TmpRes);
  885. return Res;
  886. }
  887. RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
  888. unsigned BuiltinID, const CallExpr *E,
  889. ReturnValueSlot ReturnValue) {
  890. // See if we can constant fold this builtin. If so, don't emit it at all.
  891. Expr::EvalResult Result;
  892. if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
  893. !Result.hasSideEffects()) {
  894. if (Result.Val.isInt())
  895. return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
  896. Result.Val.getInt()));
  897. if (Result.Val.isFloat())
  898. return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
  899. Result.Val.getFloat()));
  900. }
  901. // There are LLVM math intrinsics/instructions corresponding to math library
  902. // functions except the LLVM op will never set errno while the math library
  903. // might. Also, math builtins have the same semantics as their math library
  904. // twins. Thus, we can transform math library and builtin calls to their
  905. // LLVM counterparts if the call is marked 'const' (known to never set errno).
  906. if (FD->hasAttr<ConstAttr>()) {
  907. switch (BuiltinID) {
  908. case Builtin::BIceil:
  909. case Builtin::BIceilf:
  910. case Builtin::BIceill:
  911. case Builtin::BI__builtin_ceil:
  912. case Builtin::BI__builtin_ceilf:
  913. case Builtin::BI__builtin_ceill:
  914. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
  915. case Builtin::BIcopysign:
  916. case Builtin::BIcopysignf:
  917. case Builtin::BIcopysignl:
  918. case Builtin::BI__builtin_copysign:
  919. case Builtin::BI__builtin_copysignf:
  920. case Builtin::BI__builtin_copysignl:
  921. case Builtin::BI__builtin_copysignf128:
  922. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
  923. case Builtin::BIcos:
  924. case Builtin::BIcosf:
  925. case Builtin::BIcosl:
  926. case Builtin::BI__builtin_cos:
  927. case Builtin::BI__builtin_cosf:
  928. case Builtin::BI__builtin_cosl:
  929. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
  930. case Builtin::BIexp:
  931. case Builtin::BIexpf:
  932. case Builtin::BIexpl:
  933. case Builtin::BI__builtin_exp:
  934. case Builtin::BI__builtin_expf:
  935. case Builtin::BI__builtin_expl:
  936. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
  937. case Builtin::BIexp2:
  938. case Builtin::BIexp2f:
  939. case Builtin::BIexp2l:
  940. case Builtin::BI__builtin_exp2:
  941. case Builtin::BI__builtin_exp2f:
  942. case Builtin::BI__builtin_exp2l:
  943. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
  944. case Builtin::BIfabs:
  945. case Builtin::BIfabsf:
  946. case Builtin::BIfabsl:
  947. case Builtin::BI__builtin_fabs:
  948. case Builtin::BI__builtin_fabsf:
  949. case Builtin::BI__builtin_fabsl:
  950. case Builtin::BI__builtin_fabsf128:
  951. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
  952. case Builtin::BIfloor:
  953. case Builtin::BIfloorf:
  954. case Builtin::BIfloorl:
  955. case Builtin::BI__builtin_floor:
  956. case Builtin::BI__builtin_floorf:
  957. case Builtin::BI__builtin_floorl:
  958. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
  959. case Builtin::BIfma:
  960. case Builtin::BIfmaf:
  961. case Builtin::BIfmal:
  962. case Builtin::BI__builtin_fma:
  963. case Builtin::BI__builtin_fmaf:
  964. case Builtin::BI__builtin_fmal:
  965. return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
  966. case Builtin::BIfmax:
  967. case Builtin::BIfmaxf:
  968. case Builtin::BIfmaxl:
  969. case Builtin::BI__builtin_fmax:
  970. case Builtin::BI__builtin_fmaxf:
  971. case Builtin::BI__builtin_fmaxl:
  972. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
  973. case Builtin::BIfmin:
  974. case Builtin::BIfminf:
  975. case Builtin::BIfminl:
  976. case Builtin::BI__builtin_fmin:
  977. case Builtin::BI__builtin_fminf:
  978. case Builtin::BI__builtin_fminl:
  979. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
  980. // fmod() is a special-case. It maps to the frem instruction rather than an
  981. // LLVM intrinsic.
  982. case Builtin::BIfmod:
  983. case Builtin::BIfmodf:
  984. case Builtin::BIfmodl:
  985. case Builtin::BI__builtin_fmod:
  986. case Builtin::BI__builtin_fmodf:
  987. case Builtin::BI__builtin_fmodl: {
  988. Value *Arg1 = EmitScalarExpr(E->getArg(0));
  989. Value *Arg2 = EmitScalarExpr(E->getArg(1));
  990. return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
  991. }
  992. case Builtin::BIlog:
  993. case Builtin::BIlogf:
  994. case Builtin::BIlogl:
  995. case Builtin::BI__builtin_log:
  996. case Builtin::BI__builtin_logf:
  997. case Builtin::BI__builtin_logl:
  998. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
  999. case Builtin::BIlog10:
  1000. case Builtin::BIlog10f:
  1001. case Builtin::BIlog10l:
  1002. case Builtin::BI__builtin_log10:
  1003. case Builtin::BI__builtin_log10f:
  1004. case Builtin::BI__builtin_log10l:
  1005. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
  1006. case Builtin::BIlog2:
  1007. case Builtin::BIlog2f:
  1008. case Builtin::BIlog2l:
  1009. case Builtin::BI__builtin_log2:
  1010. case Builtin::BI__builtin_log2f:
  1011. case Builtin::BI__builtin_log2l:
  1012. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
  1013. case Builtin::BInearbyint:
  1014. case Builtin::BInearbyintf:
  1015. case Builtin::BInearbyintl:
  1016. case Builtin::BI__builtin_nearbyint:
  1017. case Builtin::BI__builtin_nearbyintf:
  1018. case Builtin::BI__builtin_nearbyintl:
  1019. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
  1020. case Builtin::BIpow:
  1021. case Builtin::BIpowf:
  1022. case Builtin::BIpowl:
  1023. case Builtin::BI__builtin_pow:
  1024. case Builtin::BI__builtin_powf:
  1025. case Builtin::BI__builtin_powl:
  1026. return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
  1027. case Builtin::BIrint:
  1028. case Builtin::BIrintf:
  1029. case Builtin::BIrintl:
  1030. case Builtin::BI__builtin_rint:
  1031. case Builtin::BI__builtin_rintf:
  1032. case Builtin::BI__builtin_rintl:
  1033. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
  1034. case Builtin::BIround:
  1035. case Builtin::BIroundf:
  1036. case Builtin::BIroundl:
  1037. case Builtin::BI__builtin_round:
  1038. case Builtin::BI__builtin_roundf:
  1039. case Builtin::BI__builtin_roundl:
  1040. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
  1041. case Builtin::BIsin:
  1042. case Builtin::BIsinf:
  1043. case Builtin::BIsinl:
  1044. case Builtin::BI__builtin_sin:
  1045. case Builtin::BI__builtin_sinf:
  1046. case Builtin::BI__builtin_sinl:
  1047. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
  1048. case Builtin::BIsqrt:
  1049. case Builtin::BIsqrtf:
  1050. case Builtin::BIsqrtl:
  1051. case Builtin::BI__builtin_sqrt:
  1052. case Builtin::BI__builtin_sqrtf:
  1053. case Builtin::BI__builtin_sqrtl:
  1054. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
  1055. case Builtin::BItrunc:
  1056. case Builtin::BItruncf:
  1057. case Builtin::BItruncl:
  1058. case Builtin::BI__builtin_trunc:
  1059. case Builtin::BI__builtin_truncf:
  1060. case Builtin::BI__builtin_truncl:
  1061. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
  1062. default:
  1063. break;
  1064. }
  1065. }
  1066. switch (BuiltinID) {
  1067. default: break;
  1068. case Builtin::BI__builtin___CFStringMakeConstantString:
  1069. case Builtin::BI__builtin___NSStringMakeConstantString:
  1070. return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
  1071. case Builtin::BI__builtin_stdarg_start:
  1072. case Builtin::BI__builtin_va_start:
  1073. case Builtin::BI__va_start:
  1074. case Builtin::BI__builtin_va_end:
  1075. return RValue::get(
  1076. EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
  1077. ? EmitScalarExpr(E->getArg(0))
  1078. : EmitVAListRef(E->getArg(0)).getPointer(),
  1079. BuiltinID != Builtin::BI__builtin_va_end));
  1080. case Builtin::BI__builtin_va_copy: {
  1081. Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
  1082. Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
  1083. llvm::Type *Type = Int8PtrTy;
  1084. DstPtr = Builder.CreateBitCast(DstPtr, Type);
  1085. SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
  1086. return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
  1087. {DstPtr, SrcPtr}));
  1088. }
  1089. case Builtin::BI__builtin_abs:
  1090. case Builtin::BI__builtin_labs:
  1091. case Builtin::BI__builtin_llabs: {
  1092. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1093. Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
  1094. Value *CmpResult =
  1095. Builder.CreateICmpSGE(ArgValue,
  1096. llvm::Constant::getNullValue(ArgValue->getType()),
  1097. "abscond");
  1098. Value *Result =
  1099. Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
  1100. return RValue::get(Result);
  1101. }
  1102. case Builtin::BI__builtin_conj:
  1103. case Builtin::BI__builtin_conjf:
  1104. case Builtin::BI__builtin_conjl: {
  1105. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  1106. Value *Real = ComplexVal.first;
  1107. Value *Imag = ComplexVal.second;
  1108. Value *Zero =
  1109. Imag->getType()->isFPOrFPVectorTy()
  1110. ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
  1111. : llvm::Constant::getNullValue(Imag->getType());
  1112. Imag = Builder.CreateFSub(Zero, Imag, "sub");
  1113. return RValue::getComplex(std::make_pair(Real, Imag));
  1114. }
  1115. case Builtin::BI__builtin_creal:
  1116. case Builtin::BI__builtin_crealf:
  1117. case Builtin::BI__builtin_creall:
  1118. case Builtin::BIcreal:
  1119. case Builtin::BIcrealf:
  1120. case Builtin::BIcreall: {
  1121. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  1122. return RValue::get(ComplexVal.first);
  1123. }
  1124. case Builtin::BI__builtin_dump_struct: {
  1125. Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
  1126. CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
  1127. const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
  1128. QualType Arg0Type = Arg0->getType()->getPointeeType();
  1129. Value *RecordPtr = EmitScalarExpr(Arg0);
  1130. Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0);
  1131. return RValue::get(Res);
  1132. }
  1133. case Builtin::BI__builtin_cimag:
  1134. case Builtin::BI__builtin_cimagf:
  1135. case Builtin::BI__builtin_cimagl:
  1136. case Builtin::BIcimag:
  1137. case Builtin::BIcimagf:
  1138. case Builtin::BIcimagl: {
  1139. ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
  1140. return RValue::get(ComplexVal.second);
  1141. }
  1142. case Builtin::BI__builtin_ctzs:
  1143. case Builtin::BI__builtin_ctz:
  1144. case Builtin::BI__builtin_ctzl:
  1145. case Builtin::BI__builtin_ctzll: {
  1146. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
  1147. llvm::Type *ArgType = ArgValue->getType();
  1148. Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  1149. llvm::Type *ResultType = ConvertType(E->getType());
  1150. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  1151. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  1152. if (Result->getType() != ResultType)
  1153. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  1154. "cast");
  1155. return RValue::get(Result);
  1156. }
  1157. case Builtin::BI__builtin_clzs:
  1158. case Builtin::BI__builtin_clz:
  1159. case Builtin::BI__builtin_clzl:
  1160. case Builtin::BI__builtin_clzll: {
  1161. Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
  1162. llvm::Type *ArgType = ArgValue->getType();
  1163. Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
  1164. llvm::Type *ResultType = ConvertType(E->getType());
  1165. Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
  1166. Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
  1167. if (Result->getType() != ResultType)
  1168. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  1169. "cast");
  1170. return RValue::get(Result);
  1171. }
  1172. case Builtin::BI__builtin_ffs:
  1173. case Builtin::BI__builtin_ffsl:
  1174. case Builtin::BI__builtin_ffsll: {
  1175. // ffs(x) -> x ? cttz(x) + 1 : 0
  1176. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1177. llvm::Type *ArgType = ArgValue->getType();
  1178. Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
  1179. llvm::Type *ResultType = ConvertType(E->getType());
  1180. Value *Tmp =
  1181. Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
  1182. llvm::ConstantInt::get(ArgType, 1));
  1183. Value *Zero = llvm::Constant::getNullValue(ArgType);
  1184. Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
  1185. Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
  1186. if (Result->getType() != ResultType)
  1187. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  1188. "cast");
  1189. return RValue::get(Result);
  1190. }
  1191. case Builtin::BI__builtin_parity:
  1192. case Builtin::BI__builtin_parityl:
  1193. case Builtin::BI__builtin_parityll: {
  1194. // parity(x) -> ctpop(x) & 1
  1195. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1196. llvm::Type *ArgType = ArgValue->getType();
  1197. Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  1198. llvm::Type *ResultType = ConvertType(E->getType());
  1199. Value *Tmp = Builder.CreateCall(F, ArgValue);
  1200. Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
  1201. if (Result->getType() != ResultType)
  1202. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  1203. "cast");
  1204. return RValue::get(Result);
  1205. }
  1206. case Builtin::BI__popcnt16:
  1207. case Builtin::BI__popcnt:
  1208. case Builtin::BI__popcnt64:
  1209. case Builtin::BI__builtin_popcount:
  1210. case Builtin::BI__builtin_popcountl:
  1211. case Builtin::BI__builtin_popcountll: {
  1212. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1213. llvm::Type *ArgType = ArgValue->getType();
  1214. Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
  1215. llvm::Type *ResultType = ConvertType(E->getType());
  1216. Value *Result = Builder.CreateCall(F, ArgValue);
  1217. if (Result->getType() != ResultType)
  1218. Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
  1219. "cast");
  1220. return RValue::get(Result);
  1221. }
  1222. case Builtin::BI_rotr8:
  1223. case Builtin::BI_rotr16:
  1224. case Builtin::BI_rotr:
  1225. case Builtin::BI_lrotr:
  1226. case Builtin::BI_rotr64: {
  1227. Value *Val = EmitScalarExpr(E->getArg(0));
  1228. Value *Shift = EmitScalarExpr(E->getArg(1));
  1229. llvm::Type *ArgType = Val->getType();
  1230. Shift = Builder.CreateIntCast(Shift, ArgType, false);
  1231. unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
  1232. Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
  1233. Value *ArgZero = llvm::Constant::getNullValue(ArgType);
  1234. Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
  1235. Shift = Builder.CreateAnd(Shift, Mask);
  1236. Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
  1237. Value *RightShifted = Builder.CreateLShr(Val, Shift);
  1238. Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
  1239. Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
  1240. Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
  1241. Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
  1242. return RValue::get(Result);
  1243. }
  1244. case Builtin::BI_rotl8:
  1245. case Builtin::BI_rotl16:
  1246. case Builtin::BI_rotl:
  1247. case Builtin::BI_lrotl:
  1248. case Builtin::BI_rotl64: {
  1249. Value *Val = EmitScalarExpr(E->getArg(0));
  1250. Value *Shift = EmitScalarExpr(E->getArg(1));
  1251. llvm::Type *ArgType = Val->getType();
  1252. Shift = Builder.CreateIntCast(Shift, ArgType, false);
  1253. unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
  1254. Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
  1255. Value *ArgZero = llvm::Constant::getNullValue(ArgType);
  1256. Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
  1257. Shift = Builder.CreateAnd(Shift, Mask);
  1258. Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
  1259. Value *LeftShifted = Builder.CreateShl(Val, Shift);
  1260. Value *RightShifted = Builder.CreateLShr(Val, RightShift);
  1261. Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
  1262. Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
  1263. Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
  1264. return RValue::get(Result);
  1265. }
  1266. case Builtin::BI__builtin_unpredictable: {
  1267. // Always return the argument of __builtin_unpredictable. LLVM does not
  1268. // handle this builtin. Metadata for this builtin should be added directly
  1269. // to instructions such as branches or switches that use it.
  1270. return RValue::get(EmitScalarExpr(E->getArg(0)));
  1271. }
  1272. case Builtin::BI__builtin_expect: {
  1273. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1274. llvm::Type *ArgType = ArgValue->getType();
  1275. Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
  1276. // Don't generate llvm.expect on -O0 as the backend won't use it for
  1277. // anything.
  1278. // Note, we still IRGen ExpectedValue because it could have side-effects.
  1279. if (CGM.getCodeGenOpts().OptimizationLevel == 0)
  1280. return RValue::get(ArgValue);
  1281. Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
  1282. Value *Result =
  1283. Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
  1284. return RValue::get(Result);
  1285. }
  1286. case Builtin::BI__builtin_assume_aligned: {
  1287. Value *PtrValue = EmitScalarExpr(E->getArg(0));
  1288. Value *OffsetValue =
  1289. (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
  1290. Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
  1291. ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
  1292. unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
  1293. EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
  1294. return RValue::get(PtrValue);
  1295. }
  1296. case Builtin::BI__assume:
  1297. case Builtin::BI__builtin_assume: {
  1298. if (E->getArg(0)->HasSideEffects(getContext()))
  1299. return RValue::get(nullptr);
  1300. Value *ArgValue = EmitScalarExpr(E->getArg(0));
  1301. Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
  1302. return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
  1303. }
  1304. case Builtin::BI__builtin_bswap16:
  1305. case Builtin::BI__builtin_bswap32:
  1306. case Builtin::BI__builtin_bswap64: {
  1307. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
  1308. }
  1309. case Builtin::BI__builtin_bitreverse8:
  1310. case Builtin::BI__builtin_bitreverse16:
  1311. case Builtin::BI__builtin_bitreverse32:
  1312. case Builtin::BI__builtin_bitreverse64: {
  1313. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
  1314. }
  1315. case Builtin::BI__builtin_object_size: {
  1316. unsigned Type =
  1317. E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
  1318. auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
  1319. // We pass this builtin onto the optimizer so that it can figure out the
  1320. // object size in more complex cases.
  1321. return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
  1322. /*EmittedE=*/nullptr));
  1323. }
  1324. case Builtin::BI__builtin_prefetch: {
  1325. Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
  1326. // FIXME: Technically these constants should of type 'int', yes?
  1327. RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
  1328. llvm::ConstantInt::get(Int32Ty, 0);
  1329. Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
  1330. llvm::ConstantInt::get(Int32Ty, 3);
  1331. Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
  1332. Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
  1333. return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
  1334. }
  1335. case Builtin::BI__builtin_readcyclecounter: {
  1336. Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
  1337. return RValue::get(Builder.CreateCall(F));
  1338. }
  1339. case Builtin::BI__builtin___clear_cache: {
  1340. Value *Begin = EmitScalarExpr(E->getArg(0));
  1341. Value *End = EmitScalarExpr(E->getArg(1));
  1342. Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
  1343. return RValue::get(Builder.CreateCall(F, {Begin, End}));
  1344. }
  1345. case Builtin::BI__builtin_trap:
  1346. return RValue::get(EmitTrapCall(Intrinsic::trap));
  1347. case Builtin::BI__debugbreak:
  1348. return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
  1349. case Builtin::BI__builtin_unreachable: {
  1350. EmitUnreachable(E->getExprLoc());
  1351. // We do need to preserve an insertion point.
  1352. EmitBlock(createBasicBlock("unreachable.cont"));
  1353. return RValue::get(nullptr);
  1354. }
  1355. case Builtin::BI__builtin_powi:
  1356. case Builtin::BI__builtin_powif:
  1357. case Builtin::BI__builtin_powil: {
  1358. Value *Base = EmitScalarExpr(E->getArg(0));
  1359. Value *Exponent = EmitScalarExpr(E->getArg(1));
  1360. llvm::Type *ArgType = Base->getType();
  1361. Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
  1362. return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
  1363. }
  1364. case Builtin::BI__builtin_isgreater:
  1365. case Builtin::BI__builtin_isgreaterequal:
  1366. case Builtin::BI__builtin_isless:
  1367. case Builtin::BI__builtin_islessequal:
  1368. case Builtin::BI__builtin_islessgreater:
  1369. case Builtin::BI__builtin_isunordered: {
  1370. // Ordered comparisons: we know the arguments to these are matching scalar
  1371. // floating point values.
  1372. Value *LHS = EmitScalarExpr(E->getArg(0));
  1373. Value *RHS = EmitScalarExpr(E->getArg(1));
  1374. switch (BuiltinID) {
  1375. default: llvm_unreachable("Unknown ordered comparison");
  1376. case Builtin::BI__builtin_isgreater:
  1377. LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
  1378. break;
  1379. case Builtin::BI__builtin_isgreaterequal:
  1380. LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
  1381. break;
  1382. case Builtin::BI__builtin_isless:
  1383. LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
  1384. break;
  1385. case Builtin::BI__builtin_islessequal:
  1386. LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
  1387. break;
  1388. case Builtin::BI__builtin_islessgreater:
  1389. LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
  1390. break;
  1391. case Builtin::BI__builtin_isunordered:
  1392. LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
  1393. break;
  1394. }
  1395. // ZExt bool to int type.
  1396. return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
  1397. }
  1398. case Builtin::BI__builtin_isnan: {
  1399. Value *V = EmitScalarExpr(E->getArg(0));
  1400. V = Builder.CreateFCmpUNO(V, V, "cmp");
  1401. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  1402. }
  1403. case Builtin::BIfinite:
  1404. case Builtin::BI__finite:
  1405. case Builtin::BIfinitef:
  1406. case Builtin::BI__finitef:
  1407. case Builtin::BIfinitel:
  1408. case Builtin::BI__finitel:
  1409. case Builtin::BI__builtin_isinf:
  1410. case Builtin::BI__builtin_isfinite: {
  1411. // isinf(x) --> fabs(x) == infinity
  1412. // isfinite(x) --> fabs(x) != infinity
  1413. // x != NaN via the ordered compare in either case.
  1414. Value *V = EmitScalarExpr(E->getArg(0));
  1415. Value *Fabs = EmitFAbs(*this, V);
  1416. Constant *Infinity = ConstantFP::getInfinity(V->getType());
  1417. CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
  1418. ? CmpInst::FCMP_OEQ
  1419. : CmpInst::FCMP_ONE;
  1420. Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
  1421. return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
  1422. }
  1423. case Builtin::BI__builtin_isinf_sign: {
  1424. // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
  1425. Value *Arg = EmitScalarExpr(E->getArg(0));
  1426. Value *AbsArg = EmitFAbs(*this, Arg);
  1427. Value *IsInf = Builder.CreateFCmpOEQ(
  1428. AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
  1429. Value *IsNeg = EmitSignBit(*this, Arg);
  1430. llvm::Type *IntTy = ConvertType(E->getType());
  1431. Value *Zero = Constant::getNullValue(IntTy);
  1432. Value *One = ConstantInt::get(IntTy, 1);
  1433. Value *NegativeOne = ConstantInt::get(IntTy, -1);
  1434. Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
  1435. Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
  1436. return RValue::get(Result);
  1437. }
  1438. case Builtin::BI__builtin_isnormal: {
  1439. // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
  1440. Value *V = EmitScalarExpr(E->getArg(0));
  1441. Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
  1442. Value *Abs = EmitFAbs(*this, V);
  1443. Value *IsLessThanInf =
  1444. Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
  1445. APFloat Smallest = APFloat::getSmallestNormalized(
  1446. getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
  1447. Value *IsNormal =
  1448. Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
  1449. "isnormal");
  1450. V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
  1451. V = Builder.CreateAnd(V, IsNormal, "and");
  1452. return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
  1453. }
  1454. case Builtin::BI__builtin_fpclassify: {
  1455. Value *V = EmitScalarExpr(E->getArg(5));
  1456. llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
  1457. // Create Result
  1458. BasicBlock *Begin = Builder.GetInsertBlock();
  1459. BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
  1460. Builder.SetInsertPoint(End);
  1461. PHINode *Result =
  1462. Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
  1463. "fpclassify_result");
  1464. // if (V==0) return FP_ZERO
  1465. Builder.SetInsertPoint(Begin);
  1466. Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
  1467. "iszero");
  1468. Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
  1469. BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
  1470. Builder.CreateCondBr(IsZero, End, NotZero);
  1471. Result->addIncoming(ZeroLiteral, Begin);
  1472. // if (V != V) return FP_NAN
  1473. Builder.SetInsertPoint(NotZero);
  1474. Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
  1475. Value *NanLiteral = EmitScalarExpr(E->getArg(0));
  1476. BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
  1477. Builder.CreateCondBr(IsNan, End, NotNan);
  1478. Result->addIncoming(NanLiteral, NotZero);
  1479. // if (fabs(V) == infinity) return FP_INFINITY
  1480. Builder.SetInsertPoint(NotNan);
  1481. Value *VAbs = EmitFAbs(*this, V);
  1482. Value *IsInf =
  1483. Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
  1484. "isinf");
  1485. Value *InfLiteral = EmitScalarExpr(E->getArg(1));
  1486. BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
  1487. Builder.CreateCondBr(IsInf, End, NotInf);
  1488. Result->addIncoming(InfLiteral, NotNan);
  1489. // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
  1490. Builder.SetInsertPoint(NotInf);
  1491. APFloat Smallest = APFloat::getSmallestNormalized(
  1492. getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
  1493. Value *IsNormal =
  1494. Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
  1495. "isnormal");
  1496. Value *NormalResult =
  1497. Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
  1498. EmitScalarExpr(E->getArg(3)));
  1499. Builder.CreateBr(End);
  1500. Result->addIncoming(NormalResult, NotInf);
  1501. // return Result
  1502. Builder.SetInsertPoint(End);
  1503. return RValue::get(Result);
  1504. }
  1505. case Builtin::BIalloca:
  1506. case Builtin::BI_alloca:
  1507. case Builtin::BI__builtin_alloca: {
  1508. Value *Size = EmitScalarExpr(E->getArg(0));
  1509. const TargetInfo &TI = getContext().getTargetInfo();
  1510. // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
  1511. unsigned SuitableAlignmentInBytes =
  1512. CGM.getContext()
  1513. .toCharUnitsFromBits(TI.getSuitableAlign())
  1514. .getQuantity();
  1515. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  1516. AI->setAlignment(SuitableAlignmentInBytes);
  1517. return RValue::get(AI);
  1518. }
  1519. case Builtin::BI__builtin_alloca_with_align: {
  1520. Value *Size = EmitScalarExpr(E->getArg(0));
  1521. Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
  1522. auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
  1523. unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
  1524. unsigned AlignmentInBytes =
  1525. CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
  1526. AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
  1527. AI->setAlignment(AlignmentInBytes);
  1528. return RValue::get(AI);
  1529. }
  1530. case Builtin::BIbzero:
  1531. case Builtin::BI__builtin_bzero: {
  1532. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1533. Value *SizeVal = EmitScalarExpr(E->getArg(1));
  1534. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  1535. E->getArg(0)->getExprLoc(), FD, 0);
  1536. Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
  1537. return RValue::get(nullptr);
  1538. }
  1539. case Builtin::BImemcpy:
  1540. case Builtin::BI__builtin_memcpy: {
  1541. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1542. Address Src = EmitPointerWithAlignment(E->getArg(1));
  1543. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  1544. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  1545. E->getArg(0)->getExprLoc(), FD, 0);
  1546. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  1547. E->getArg(1)->getExprLoc(), FD, 1);
  1548. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  1549. return RValue::get(Dest.getPointer());
  1550. }
  1551. case Builtin::BI__builtin_char_memchr:
  1552. BuiltinID = Builtin::BI__builtin_memchr;
  1553. break;
  1554. case Builtin::BI__builtin___memcpy_chk: {
  1555. // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
  1556. llvm::APSInt Size, DstSize;
  1557. if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
  1558. !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
  1559. break;
  1560. if (Size.ugt(DstSize))
  1561. break;
  1562. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1563. Address Src = EmitPointerWithAlignment(E->getArg(1));
  1564. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  1565. Builder.CreateMemCpy(Dest, Src, SizeVal, false);
  1566. return RValue::get(Dest.getPointer());
  1567. }
  1568. case Builtin::BI__builtin_objc_memmove_collectable: {
  1569. Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
  1570. Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
  1571. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  1572. CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
  1573. DestAddr, SrcAddr, SizeVal);
  1574. return RValue::get(DestAddr.getPointer());
  1575. }
  1576. case Builtin::BI__builtin___memmove_chk: {
  1577. // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
  1578. llvm::APSInt Size, DstSize;
  1579. if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
  1580. !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
  1581. break;
  1582. if (Size.ugt(DstSize))
  1583. break;
  1584. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1585. Address Src = EmitPointerWithAlignment(E->getArg(1));
  1586. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  1587. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  1588. return RValue::get(Dest.getPointer());
  1589. }
  1590. case Builtin::BImemmove:
  1591. case Builtin::BI__builtin_memmove: {
  1592. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1593. Address Src = EmitPointerWithAlignment(E->getArg(1));
  1594. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  1595. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  1596. E->getArg(0)->getExprLoc(), FD, 0);
  1597. EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
  1598. E->getArg(1)->getExprLoc(), FD, 1);
  1599. Builder.CreateMemMove(Dest, Src, SizeVal, false);
  1600. return RValue::get(Dest.getPointer());
  1601. }
  1602. case Builtin::BImemset:
  1603. case Builtin::BI__builtin_memset: {
  1604. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1605. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  1606. Builder.getInt8Ty());
  1607. Value *SizeVal = EmitScalarExpr(E->getArg(2));
  1608. EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
  1609. E->getArg(0)->getExprLoc(), FD, 0);
  1610. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  1611. return RValue::get(Dest.getPointer());
  1612. }
  1613. case Builtin::BI__builtin___memset_chk: {
  1614. // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
  1615. llvm::APSInt Size, DstSize;
  1616. if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
  1617. !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
  1618. break;
  1619. if (Size.ugt(DstSize))
  1620. break;
  1621. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  1622. Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
  1623. Builder.getInt8Ty());
  1624. Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
  1625. Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
  1626. return RValue::get(Dest.getPointer());
  1627. }
  1628. case Builtin::BI__builtin_wmemcmp: {
  1629. // The MSVC runtime library does not provide a definition of wmemcmp, so we
  1630. // need an inline implementation.
  1631. if (!getTarget().getTriple().isOSMSVCRT())
  1632. break;
  1633. llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
  1634. Value *Dst = EmitScalarExpr(E->getArg(0));
  1635. Value *Src = EmitScalarExpr(E->getArg(1));
  1636. Value *Size = EmitScalarExpr(E->getArg(2));
  1637. BasicBlock *Entry = Builder.GetInsertBlock();
  1638. BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
  1639. BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
  1640. BasicBlock *Next = createBasicBlock("wmemcmp.next");
  1641. BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
  1642. Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
  1643. Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
  1644. EmitBlock(CmpGT);
  1645. PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
  1646. DstPhi->addIncoming(Dst, Entry);
  1647. PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
  1648. SrcPhi->addIncoming(Src, Entry);
  1649. PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
  1650. SizePhi->addIncoming(Size, Entry);
  1651. CharUnits WCharAlign =
  1652. getContext().getTypeAlignInChars(getContext().WCharTy);
  1653. Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
  1654. Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
  1655. Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
  1656. Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
  1657. EmitBlock(CmpLT);
  1658. Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
  1659. Builder.CreateCondBr(DstLtSrc, Exit, Next);
  1660. EmitBlock(Next);
  1661. Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
  1662. Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
  1663. Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
  1664. Value *NextSizeEq0 =
  1665. Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
  1666. Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
  1667. DstPhi->addIncoming(NextDst, Next);
  1668. SrcPhi->addIncoming(NextSrc, Next);
  1669. SizePhi->addIncoming(NextSize, Next);
  1670. EmitBlock(Exit);
  1671. PHINode *Ret = Builder.CreatePHI(IntTy, 4);
  1672. Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
  1673. Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
  1674. Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
  1675. Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
  1676. return RValue::get(Ret);
  1677. }
  1678. case Builtin::BI__builtin_dwarf_cfa: {
  1679. // The offset in bytes from the first argument to the CFA.
  1680. //
  1681. // Why on earth is this in the frontend? Is there any reason at
  1682. // all that the backend can't reasonably determine this while
  1683. // lowering llvm.eh.dwarf.cfa()?
  1684. //
  1685. // TODO: If there's a satisfactory reason, add a target hook for
  1686. // this instead of hard-coding 0, which is correct for most targets.
  1687. int32_t Offset = 0;
  1688. Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
  1689. return RValue::get(Builder.CreateCall(F,
  1690. llvm::ConstantInt::get(Int32Ty, Offset)));
  1691. }
  1692. case Builtin::BI__builtin_return_address: {
  1693. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  1694. getContext().UnsignedIntTy);
  1695. Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  1696. return RValue::get(Builder.CreateCall(F, Depth));
  1697. }
  1698. case Builtin::BI_ReturnAddress: {
  1699. Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
  1700. return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
  1701. }
  1702. case Builtin::BI__builtin_frame_address: {
  1703. Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
  1704. getContext().UnsignedIntTy);
  1705. Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
  1706. return RValue::get(Builder.CreateCall(F, Depth));
  1707. }
  1708. case Builtin::BI__builtin_extract_return_addr: {
  1709. Value *Address = EmitScalarExpr(E->getArg(0));
  1710. Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
  1711. return RValue::get(Result);
  1712. }
  1713. case Builtin::BI__builtin_frob_return_addr: {
  1714. Value *Address = EmitScalarExpr(E->getArg(0));
  1715. Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
  1716. return RValue::get(Result);
  1717. }
  1718. case Builtin::BI__builtin_dwarf_sp_column: {
  1719. llvm::IntegerType *Ty
  1720. = cast<llvm::IntegerType>(ConvertType(E->getType()));
  1721. int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
  1722. if (Column == -1) {
  1723. CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
  1724. return RValue::get(llvm::UndefValue::get(Ty));
  1725. }
  1726. return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
  1727. }
  1728. case Builtin::BI__builtin_init_dwarf_reg_size_table: {
  1729. Value *Address = EmitScalarExpr(E->getArg(0));
  1730. if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
  1731. CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
  1732. return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
  1733. }
  1734. case Builtin::BI__builtin_eh_return: {
  1735. Value *Int = EmitScalarExpr(E->getArg(0));
  1736. Value *Ptr = EmitScalarExpr(E->getArg(1));
  1737. llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
  1738. assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
  1739. "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
  1740. Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
  1741. ? Intrinsic::eh_return_i32
  1742. : Intrinsic::eh_return_i64);
  1743. Builder.CreateCall(F, {Int, Ptr});
  1744. Builder.CreateUnreachable();
  1745. // We do need to preserve an insertion point.
  1746. EmitBlock(createBasicBlock("builtin_eh_return.cont"));
  1747. return RValue::get(nullptr);
  1748. }
  1749. case Builtin::BI__builtin_unwind_init: {
  1750. Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
  1751. return RValue::get(Builder.CreateCall(F));
  1752. }
  1753. case Builtin::BI__builtin_extend_pointer: {
  1754. // Extends a pointer to the size of an _Unwind_Word, which is
  1755. // uint64_t on all platforms. Generally this gets poked into a
  1756. // register and eventually used as an address, so if the
  1757. // addressing registers are wider than pointers and the platform
  1758. // doesn't implicitly ignore high-order bits when doing
  1759. // addressing, we need to make sure we zext / sext based on
  1760. // the platform's expectations.
  1761. //
  1762. // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
  1763. // Cast the pointer to intptr_t.
  1764. Value *Ptr = EmitScalarExpr(E->getArg(0));
  1765. Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
  1766. // If that's 64 bits, we're done.
  1767. if (IntPtrTy->getBitWidth() == 64)
  1768. return RValue::get(Result);
  1769. // Otherwise, ask the codegen data what to do.
  1770. if (getTargetHooks().extendPointerWithSExt())
  1771. return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
  1772. else
  1773. return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
  1774. }
  1775. case Builtin::BI__builtin_setjmp: {
  1776. // Buffer is a void**.
  1777. Address Buf = EmitPointerWithAlignment(E->getArg(0));
  1778. // Store the frame pointer to the setjmp buffer.
  1779. Value *FrameAddr =
  1780. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
  1781. ConstantInt::get(Int32Ty, 0));
  1782. Builder.CreateStore(FrameAddr, Buf);
  1783. // Store the stack pointer to the setjmp buffer.
  1784. Value *StackAddr =
  1785. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
  1786. Address StackSaveSlot =
  1787. Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
  1788. Builder.CreateStore(StackAddr, StackSaveSlot);
  1789. // Call LLVM's EH setjmp, which is lightweight.
  1790. Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
  1791. Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
  1792. return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
  1793. }
  1794. case Builtin::BI__builtin_longjmp: {
  1795. Value *Buf = EmitScalarExpr(E->getArg(0));
  1796. Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
  1797. // Call LLVM's EH longjmp, which is lightweight.
  1798. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
  1799. // longjmp doesn't return; mark this as unreachable.
  1800. Builder.CreateUnreachable();
  1801. // We do need to preserve an insertion point.
  1802. EmitBlock(createBasicBlock("longjmp.cont"));
  1803. return RValue::get(nullptr);
  1804. }
  1805. case Builtin::BI__sync_fetch_and_add:
  1806. case Builtin::BI__sync_fetch_and_sub:
  1807. case Builtin::BI__sync_fetch_and_or:
  1808. case Builtin::BI__sync_fetch_and_and:
  1809. case Builtin::BI__sync_fetch_and_xor:
  1810. case Builtin::BI__sync_fetch_and_nand:
  1811. case Builtin::BI__sync_add_and_fetch:
  1812. case Builtin::BI__sync_sub_and_fetch:
  1813. case Builtin::BI__sync_and_and_fetch:
  1814. case Builtin::BI__sync_or_and_fetch:
  1815. case Builtin::BI__sync_xor_and_fetch:
  1816. case Builtin::BI__sync_nand_and_fetch:
  1817. case Builtin::BI__sync_val_compare_and_swap:
  1818. case Builtin::BI__sync_bool_compare_and_swap:
  1819. case Builtin::BI__sync_lock_test_and_set:
  1820. case Builtin::BI__sync_lock_release:
  1821. case Builtin::BI__sync_swap:
  1822. llvm_unreachable("Shouldn't make it through sema");
  1823. case Builtin::BI__sync_fetch_and_add_1:
  1824. case Builtin::BI__sync_fetch_and_add_2:
  1825. case Builtin::BI__sync_fetch_and_add_4:
  1826. case Builtin::BI__sync_fetch_and_add_8:
  1827. case Builtin::BI__sync_fetch_and_add_16:
  1828. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
  1829. case Builtin::BI__sync_fetch_and_sub_1:
  1830. case Builtin::BI__sync_fetch_and_sub_2:
  1831. case Builtin::BI__sync_fetch_and_sub_4:
  1832. case Builtin::BI__sync_fetch_and_sub_8:
  1833. case Builtin::BI__sync_fetch_and_sub_16:
  1834. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
  1835. case Builtin::BI__sync_fetch_and_or_1:
  1836. case Builtin::BI__sync_fetch_and_or_2:
  1837. case Builtin::BI__sync_fetch_and_or_4:
  1838. case Builtin::BI__sync_fetch_and_or_8:
  1839. case Builtin::BI__sync_fetch_and_or_16:
  1840. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
  1841. case Builtin::BI__sync_fetch_and_and_1:
  1842. case Builtin::BI__sync_fetch_and_and_2:
  1843. case Builtin::BI__sync_fetch_and_and_4:
  1844. case Builtin::BI__sync_fetch_and_and_8:
  1845. case Builtin::BI__sync_fetch_and_and_16:
  1846. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
  1847. case Builtin::BI__sync_fetch_and_xor_1:
  1848. case Builtin::BI__sync_fetch_and_xor_2:
  1849. case Builtin::BI__sync_fetch_and_xor_4:
  1850. case Builtin::BI__sync_fetch_and_xor_8:
  1851. case Builtin::BI__sync_fetch_and_xor_16:
  1852. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
  1853. case Builtin::BI__sync_fetch_and_nand_1:
  1854. case Builtin::BI__sync_fetch_and_nand_2:
  1855. case Builtin::BI__sync_fetch_and_nand_4:
  1856. case Builtin::BI__sync_fetch_and_nand_8:
  1857. case Builtin::BI__sync_fetch_and_nand_16:
  1858. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
  1859. // Clang extensions: not overloaded yet.
  1860. case Builtin::BI__sync_fetch_and_min:
  1861. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
  1862. case Builtin::BI__sync_fetch_and_max:
  1863. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
  1864. case Builtin::BI__sync_fetch_and_umin:
  1865. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
  1866. case Builtin::BI__sync_fetch_and_umax:
  1867. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
  1868. case Builtin::BI__sync_add_and_fetch_1:
  1869. case Builtin::BI__sync_add_and_fetch_2:
  1870. case Builtin::BI__sync_add_and_fetch_4:
  1871. case Builtin::BI__sync_add_and_fetch_8:
  1872. case Builtin::BI__sync_add_and_fetch_16:
  1873. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
  1874. llvm::Instruction::Add);
  1875. case Builtin::BI__sync_sub_and_fetch_1:
  1876. case Builtin::BI__sync_sub_and_fetch_2:
  1877. case Builtin::BI__sync_sub_and_fetch_4:
  1878. case Builtin::BI__sync_sub_and_fetch_8:
  1879. case Builtin::BI__sync_sub_and_fetch_16:
  1880. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
  1881. llvm::Instruction::Sub);
  1882. case Builtin::BI__sync_and_and_fetch_1:
  1883. case Builtin::BI__sync_and_and_fetch_2:
  1884. case Builtin::BI__sync_and_and_fetch_4:
  1885. case Builtin::BI__sync_and_and_fetch_8:
  1886. case Builtin::BI__sync_and_and_fetch_16:
  1887. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
  1888. llvm::Instruction::And);
  1889. case Builtin::BI__sync_or_and_fetch_1:
  1890. case Builtin::BI__sync_or_and_fetch_2:
  1891. case Builtin::BI__sync_or_and_fetch_4:
  1892. case Builtin::BI__sync_or_and_fetch_8:
  1893. case Builtin::BI__sync_or_and_fetch_16:
  1894. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
  1895. llvm::Instruction::Or);
  1896. case Builtin::BI__sync_xor_and_fetch_1:
  1897. case Builtin::BI__sync_xor_and_fetch_2:
  1898. case Builtin::BI__sync_xor_and_fetch_4:
  1899. case Builtin::BI__sync_xor_and_fetch_8:
  1900. case Builtin::BI__sync_xor_and_fetch_16:
  1901. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
  1902. llvm::Instruction::Xor);
  1903. case Builtin::BI__sync_nand_and_fetch_1:
  1904. case Builtin::BI__sync_nand_and_fetch_2:
  1905. case Builtin::BI__sync_nand_and_fetch_4:
  1906. case Builtin::BI__sync_nand_and_fetch_8:
  1907. case Builtin::BI__sync_nand_and_fetch_16:
  1908. return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
  1909. llvm::Instruction::And, true);
  1910. case Builtin::BI__sync_val_compare_and_swap_1:
  1911. case Builtin::BI__sync_val_compare_and_swap_2:
  1912. case Builtin::BI__sync_val_compare_and_swap_4:
  1913. case Builtin::BI__sync_val_compare_and_swap_8:
  1914. case Builtin::BI__sync_val_compare_and_swap_16:
  1915. return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
  1916. case Builtin::BI__sync_bool_compare_and_swap_1:
  1917. case Builtin::BI__sync_bool_compare_and_swap_2:
  1918. case Builtin::BI__sync_bool_compare_and_swap_4:
  1919. case Builtin::BI__sync_bool_compare_and_swap_8:
  1920. case Builtin::BI__sync_bool_compare_and_swap_16:
  1921. return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
  1922. case Builtin::BI__sync_swap_1:
  1923. case Builtin::BI__sync_swap_2:
  1924. case Builtin::BI__sync_swap_4:
  1925. case Builtin::BI__sync_swap_8:
  1926. case Builtin::BI__sync_swap_16:
  1927. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  1928. case Builtin::BI__sync_lock_test_and_set_1:
  1929. case Builtin::BI__sync_lock_test_and_set_2:
  1930. case Builtin::BI__sync_lock_test_and_set_4:
  1931. case Builtin::BI__sync_lock_test_and_set_8:
  1932. case Builtin::BI__sync_lock_test_and_set_16:
  1933. return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
  1934. case Builtin::BI__sync_lock_release_1:
  1935. case Builtin::BI__sync_lock_release_2:
  1936. case Builtin::BI__sync_lock_release_4:
  1937. case Builtin::BI__sync_lock_release_8:
  1938. case Builtin::BI__sync_lock_release_16: {
  1939. Value *Ptr = EmitScalarExpr(E->getArg(0));
  1940. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  1941. CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
  1942. llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
  1943. StoreSize.getQuantity() * 8);
  1944. Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  1945. llvm::StoreInst *Store =
  1946. Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
  1947. StoreSize);
  1948. Store->setAtomic(llvm::AtomicOrdering::Release);
  1949. return RValue::get(nullptr);
  1950. }
  1951. case Builtin::BI__sync_synchronize: {
  1952. // We assume this is supposed to correspond to a C++0x-style
  1953. // sequentially-consistent fence (i.e. this is only usable for
  1954. // synchronization, not device I/O or anything like that). This intrinsic
  1955. // is really badly designed in the sense that in theory, there isn't
  1956. // any way to safely use it... but in practice, it mostly works
  1957. // to use it with non-atomic loads and stores to get acquire/release
  1958. // semantics.
  1959. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
  1960. return RValue::get(nullptr);
  1961. }
  1962. case Builtin::BI__builtin_nontemporal_load:
  1963. return RValue::get(EmitNontemporalLoad(*this, E));
  1964. case Builtin::BI__builtin_nontemporal_store:
  1965. return RValue::get(EmitNontemporalStore(*this, E));
  1966. case Builtin::BI__c11_atomic_is_lock_free:
  1967. case Builtin::BI__atomic_is_lock_free: {
  1968. // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
  1969. // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
  1970. // _Atomic(T) is always properly-aligned.
  1971. const char *LibCallName = "__atomic_is_lock_free";
  1972. CallArgList Args;
  1973. Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
  1974. getContext().getSizeType());
  1975. if (BuiltinID == Builtin::BI__atomic_is_lock_free)
  1976. Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
  1977. getContext().VoidPtrTy);
  1978. else
  1979. Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
  1980. getContext().VoidPtrTy);
  1981. const CGFunctionInfo &FuncInfo =
  1982. CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
  1983. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
  1984. llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
  1985. return EmitCall(FuncInfo, CGCallee::forDirect(Func),
  1986. ReturnValueSlot(), Args);
  1987. }
  1988. case Builtin::BI__atomic_test_and_set: {
  1989. // Look at the argument type to determine whether this is a volatile
  1990. // operation. The parameter type is always volatile.
  1991. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  1992. bool Volatile =
  1993. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  1994. Value *Ptr = EmitScalarExpr(E->getArg(0));
  1995. unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
  1996. Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
  1997. Value *NewVal = Builder.getInt8(1);
  1998. Value *Order = EmitScalarExpr(E->getArg(1));
  1999. if (isa<llvm::ConstantInt>(Order)) {
  2000. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  2001. AtomicRMWInst *Result = nullptr;
  2002. switch (ord) {
  2003. case 0: // memory_order_relaxed
  2004. default: // invalid order
  2005. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  2006. llvm::AtomicOrdering::Monotonic);
  2007. break;
  2008. case 1: // memory_order_consume
  2009. case 2: // memory_order_acquire
  2010. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  2011. llvm::AtomicOrdering::Acquire);
  2012. break;
  2013. case 3: // memory_order_release
  2014. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  2015. llvm::AtomicOrdering::Release);
  2016. break;
  2017. case 4: // memory_order_acq_rel
  2018. Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  2019. llvm::AtomicOrdering::AcquireRelease);
  2020. break;
  2021. case 5: // memory_order_seq_cst
  2022. Result = Builder.CreateAtomicRMW(
  2023. llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
  2024. llvm::AtomicOrdering::SequentiallyConsistent);
  2025. break;
  2026. }
  2027. Result->setVolatile(Volatile);
  2028. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  2029. }
  2030. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  2031. llvm::BasicBlock *BBs[5] = {
  2032. createBasicBlock("monotonic", CurFn),
  2033. createBasicBlock("acquire", CurFn),
  2034. createBasicBlock("release", CurFn),
  2035. createBasicBlock("acqrel", CurFn),
  2036. createBasicBlock("seqcst", CurFn)
  2037. };
  2038. llvm::AtomicOrdering Orders[5] = {
  2039. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
  2040. llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
  2041. llvm::AtomicOrdering::SequentiallyConsistent};
  2042. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  2043. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  2044. Builder.SetInsertPoint(ContBB);
  2045. PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
  2046. for (unsigned i = 0; i < 5; ++i) {
  2047. Builder.SetInsertPoint(BBs[i]);
  2048. AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
  2049. Ptr, NewVal, Orders[i]);
  2050. RMW->setVolatile(Volatile);
  2051. Result->addIncoming(RMW, BBs[i]);
  2052. Builder.CreateBr(ContBB);
  2053. }
  2054. SI->addCase(Builder.getInt32(0), BBs[0]);
  2055. SI->addCase(Builder.getInt32(1), BBs[1]);
  2056. SI->addCase(Builder.getInt32(2), BBs[1]);
  2057. SI->addCase(Builder.getInt32(3), BBs[2]);
  2058. SI->addCase(Builder.getInt32(4), BBs[3]);
  2059. SI->addCase(Builder.getInt32(5), BBs[4]);
  2060. Builder.SetInsertPoint(ContBB);
  2061. return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
  2062. }
  2063. case Builtin::BI__atomic_clear: {
  2064. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
  2065. bool Volatile =
  2066. PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
  2067. Address Ptr = EmitPointerWithAlignment(E->getArg(0));
  2068. unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
  2069. Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
  2070. Value *NewVal = Builder.getInt8(0);
  2071. Value *Order = EmitScalarExpr(E->getArg(1));
  2072. if (isa<llvm::ConstantInt>(Order)) {
  2073. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  2074. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  2075. switch (ord) {
  2076. case 0: // memory_order_relaxed
  2077. default: // invalid order
  2078. Store->setOrdering(llvm::AtomicOrdering::Monotonic);
  2079. break;
  2080. case 3: // memory_order_release
  2081. Store->setOrdering(llvm::AtomicOrdering::Release);
  2082. break;
  2083. case 5: // memory_order_seq_cst
  2084. Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
  2085. break;
  2086. }
  2087. return RValue::get(nullptr);
  2088. }
  2089. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  2090. llvm::BasicBlock *BBs[3] = {
  2091. createBasicBlock("monotonic", CurFn),
  2092. createBasicBlock("release", CurFn),
  2093. createBasicBlock("seqcst", CurFn)
  2094. };
  2095. llvm::AtomicOrdering Orders[3] = {
  2096. llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
  2097. llvm::AtomicOrdering::SequentiallyConsistent};
  2098. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  2099. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
  2100. for (unsigned i = 0; i < 3; ++i) {
  2101. Builder.SetInsertPoint(BBs[i]);
  2102. StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
  2103. Store->setOrdering(Orders[i]);
  2104. Builder.CreateBr(ContBB);
  2105. }
  2106. SI->addCase(Builder.getInt32(0), BBs[0]);
  2107. SI->addCase(Builder.getInt32(3), BBs[1]);
  2108. SI->addCase(Builder.getInt32(5), BBs[2]);
  2109. Builder.SetInsertPoint(ContBB);
  2110. return RValue::get(nullptr);
  2111. }
  2112. case Builtin::BI__atomic_thread_fence:
  2113. case Builtin::BI__atomic_signal_fence:
  2114. case Builtin::BI__c11_atomic_thread_fence:
  2115. case Builtin::BI__c11_atomic_signal_fence: {
  2116. llvm::SyncScope::ID SSID;
  2117. if (BuiltinID == Builtin::BI__atomic_signal_fence ||
  2118. BuiltinID == Builtin::BI__c11_atomic_signal_fence)
  2119. SSID = llvm::SyncScope::SingleThread;
  2120. else
  2121. SSID = llvm::SyncScope::System;
  2122. Value *Order = EmitScalarExpr(E->getArg(0));
  2123. if (isa<llvm::ConstantInt>(Order)) {
  2124. int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
  2125. switch (ord) {
  2126. case 0: // memory_order_relaxed
  2127. default: // invalid order
  2128. break;
  2129. case 1: // memory_order_consume
  2130. case 2: // memory_order_acquire
  2131. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  2132. break;
  2133. case 3: // memory_order_release
  2134. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  2135. break;
  2136. case 4: // memory_order_acq_rel
  2137. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  2138. break;
  2139. case 5: // memory_order_seq_cst
  2140. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  2141. break;
  2142. }
  2143. return RValue::get(nullptr);
  2144. }
  2145. llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
  2146. AcquireBB = createBasicBlock("acquire", CurFn);
  2147. ReleaseBB = createBasicBlock("release", CurFn);
  2148. AcqRelBB = createBasicBlock("acqrel", CurFn);
  2149. SeqCstBB = createBasicBlock("seqcst", CurFn);
  2150. llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
  2151. Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
  2152. llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
  2153. Builder.SetInsertPoint(AcquireBB);
  2154. Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
  2155. Builder.CreateBr(ContBB);
  2156. SI->addCase(Builder.getInt32(1), AcquireBB);
  2157. SI->addCase(Builder.getInt32(2), AcquireBB);
  2158. Builder.SetInsertPoint(ReleaseBB);
  2159. Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
  2160. Builder.CreateBr(ContBB);
  2161. SI->addCase(Builder.getInt32(3), ReleaseBB);
  2162. Builder.SetInsertPoint(AcqRelBB);
  2163. Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
  2164. Builder.CreateBr(ContBB);
  2165. SI->addCase(Builder.getInt32(4), AcqRelBB);
  2166. Builder.SetInsertPoint(SeqCstBB);
  2167. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
  2168. Builder.CreateBr(ContBB);
  2169. SI->addCase(Builder.getInt32(5), SeqCstBB);
  2170. Builder.SetInsertPoint(ContBB);
  2171. return RValue::get(nullptr);
  2172. }
  2173. case Builtin::BI__builtin_signbit:
  2174. case Builtin::BI__builtin_signbitf:
  2175. case Builtin::BI__builtin_signbitl: {
  2176. return RValue::get(
  2177. Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
  2178. ConvertType(E->getType())));
  2179. }
  2180. case Builtin::BI__annotation: {
  2181. // Re-encode each wide string to UTF8 and make an MDString.
  2182. SmallVector<Metadata *, 1> Strings;
  2183. for (const Expr *Arg : E->arguments()) {
  2184. const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
  2185. assert(Str->getCharByteWidth() == 2);
  2186. StringRef WideBytes = Str->getBytes();
  2187. std::string StrUtf8;
  2188. if (!convertUTF16ToUTF8String(
  2189. makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
  2190. CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
  2191. continue;
  2192. }
  2193. Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
  2194. }
  2195. // Build and MDTuple of MDStrings and emit the intrinsic call.
  2196. llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
  2197. MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
  2198. Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
  2199. return RValue::getIgnored();
  2200. }
  2201. case Builtin::BI__builtin_annotation: {
  2202. llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
  2203. llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
  2204. AnnVal->getType());
  2205. // Get the annotation string, go through casts. Sema requires this to be a
  2206. // non-wide string literal, potentially casted, so the cast<> is safe.
  2207. const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
  2208. StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
  2209. return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
  2210. }
  2211. case Builtin::BI__builtin_addcb:
  2212. case Builtin::BI__builtin_addcs:
  2213. case Builtin::BI__builtin_addc:
  2214. case Builtin::BI__builtin_addcl:
  2215. case Builtin::BI__builtin_addcll:
  2216. case Builtin::BI__builtin_subcb:
  2217. case Builtin::BI__builtin_subcs:
  2218. case Builtin::BI__builtin_subc:
  2219. case Builtin::BI__builtin_subcl:
  2220. case Builtin::BI__builtin_subcll: {
  2221. // We translate all of these builtins from expressions of the form:
  2222. // int x = ..., y = ..., carryin = ..., carryout, result;
  2223. // result = __builtin_addc(x, y, carryin, &carryout);
  2224. //
  2225. // to LLVM IR of the form:
  2226. //
  2227. // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
  2228. // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
  2229. // %carry1 = extractvalue {i32, i1} %tmp1, 1
  2230. // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
  2231. // i32 %carryin)
  2232. // %result = extractvalue {i32, i1} %tmp2, 0
  2233. // %carry2 = extractvalue {i32, i1} %tmp2, 1
  2234. // %tmp3 = or i1 %carry1, %carry2
  2235. // %tmp4 = zext i1 %tmp3 to i32
  2236. // store i32 %tmp4, i32* %carryout
  2237. // Scalarize our inputs.
  2238. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  2239. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  2240. llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
  2241. Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
  2242. // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
  2243. llvm::Intrinsic::ID IntrinsicId;
  2244. switch (BuiltinID) {
  2245. default: llvm_unreachable("Unknown multiprecision builtin id.");
  2246. case Builtin::BI__builtin_addcb:
  2247. case Builtin::BI__builtin_addcs:
  2248. case Builtin::BI__builtin_addc:
  2249. case Builtin::BI__builtin_addcl:
  2250. case Builtin::BI__builtin_addcll:
  2251. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  2252. break;
  2253. case Builtin::BI__builtin_subcb:
  2254. case Builtin::BI__builtin_subcs:
  2255. case Builtin::BI__builtin_subc:
  2256. case Builtin::BI__builtin_subcl:
  2257. case Builtin::BI__builtin_subcll:
  2258. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  2259. break;
  2260. }
  2261. // Construct our resulting LLVM IR expression.
  2262. llvm::Value *Carry1;
  2263. llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
  2264. X, Y, Carry1);
  2265. llvm::Value *Carry2;
  2266. llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
  2267. Sum1, Carryin, Carry2);
  2268. llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
  2269. X->getType());
  2270. Builder.CreateStore(CarryOut, CarryOutPtr);
  2271. return RValue::get(Sum2);
  2272. }
  2273. case Builtin::BI__builtin_add_overflow:
  2274. case Builtin::BI__builtin_sub_overflow:
  2275. case Builtin::BI__builtin_mul_overflow: {
  2276. const clang::Expr *LeftArg = E->getArg(0);
  2277. const clang::Expr *RightArg = E->getArg(1);
  2278. const clang::Expr *ResultArg = E->getArg(2);
  2279. clang::QualType ResultQTy =
  2280. ResultArg->getType()->castAs<PointerType>()->getPointeeType();
  2281. WidthAndSignedness LeftInfo =
  2282. getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
  2283. WidthAndSignedness RightInfo =
  2284. getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
  2285. WidthAndSignedness ResultInfo =
  2286. getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
  2287. // Handle mixed-sign multiplication as a special case, because adding
  2288. // runtime or backend support for our generic irgen would be too expensive.
  2289. if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
  2290. return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
  2291. RightInfo, ResultArg, ResultQTy,
  2292. ResultInfo);
  2293. WidthAndSignedness EncompassingInfo =
  2294. EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
  2295. llvm::Type *EncompassingLLVMTy =
  2296. llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
  2297. llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
  2298. llvm::Intrinsic::ID IntrinsicId;
  2299. switch (BuiltinID) {
  2300. default:
  2301. llvm_unreachable("Unknown overflow builtin id.");
  2302. case Builtin::BI__builtin_add_overflow:
  2303. IntrinsicId = EncompassingInfo.Signed
  2304. ? llvm::Intrinsic::sadd_with_overflow
  2305. : llvm::Intrinsic::uadd_with_overflow;
  2306. break;
  2307. case Builtin::BI__builtin_sub_overflow:
  2308. IntrinsicId = EncompassingInfo.Signed
  2309. ? llvm::Intrinsic::ssub_with_overflow
  2310. : llvm::Intrinsic::usub_with_overflow;
  2311. break;
  2312. case Builtin::BI__builtin_mul_overflow:
  2313. IntrinsicId = EncompassingInfo.Signed
  2314. ? llvm::Intrinsic::smul_with_overflow
  2315. : llvm::Intrinsic::umul_with_overflow;
  2316. break;
  2317. }
  2318. llvm::Value *Left = EmitScalarExpr(LeftArg);
  2319. llvm::Value *Right = EmitScalarExpr(RightArg);
  2320. Address ResultPtr = EmitPointerWithAlignment(ResultArg);
  2321. // Extend each operand to the encompassing type.
  2322. Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
  2323. Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
  2324. // Perform the operation on the extended values.
  2325. llvm::Value *Overflow, *Result;
  2326. Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
  2327. if (EncompassingInfo.Width > ResultInfo.Width) {
  2328. // The encompassing type is wider than the result type, so we need to
  2329. // truncate it.
  2330. llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
  2331. // To see if the truncation caused an overflow, we will extend
  2332. // the result and then compare it to the original result.
  2333. llvm::Value *ResultTruncExt = Builder.CreateIntCast(
  2334. ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
  2335. llvm::Value *TruncationOverflow =
  2336. Builder.CreateICmpNE(Result, ResultTruncExt);
  2337. Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
  2338. Result = ResultTrunc;
  2339. }
  2340. // Finally, store the result using the pointer.
  2341. bool isVolatile =
  2342. ResultArg->getType()->getPointeeType().isVolatileQualified();
  2343. Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
  2344. return RValue::get(Overflow);
  2345. }
  2346. case Builtin::BI__builtin_uadd_overflow:
  2347. case Builtin::BI__builtin_uaddl_overflow:
  2348. case Builtin::BI__builtin_uaddll_overflow:
  2349. case Builtin::BI__builtin_usub_overflow:
  2350. case Builtin::BI__builtin_usubl_overflow:
  2351. case Builtin::BI__builtin_usubll_overflow:
  2352. case Builtin::BI__builtin_umul_overflow:
  2353. case Builtin::BI__builtin_umull_overflow:
  2354. case Builtin::BI__builtin_umulll_overflow:
  2355. case Builtin::BI__builtin_sadd_overflow:
  2356. case Builtin::BI__builtin_saddl_overflow:
  2357. case Builtin::BI__builtin_saddll_overflow:
  2358. case Builtin::BI__builtin_ssub_overflow:
  2359. case Builtin::BI__builtin_ssubl_overflow:
  2360. case Builtin::BI__builtin_ssubll_overflow:
  2361. case Builtin::BI__builtin_smul_overflow:
  2362. case Builtin::BI__builtin_smull_overflow:
  2363. case Builtin::BI__builtin_smulll_overflow: {
  2364. // We translate all of these builtins directly to the relevant llvm IR node.
  2365. // Scalarize our inputs.
  2366. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  2367. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  2368. Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
  2369. // Decide which of the overflow intrinsics we are lowering to:
  2370. llvm::Intrinsic::ID IntrinsicId;
  2371. switch (BuiltinID) {
  2372. default: llvm_unreachable("Unknown overflow builtin id.");
  2373. case Builtin::BI__builtin_uadd_overflow:
  2374. case Builtin::BI__builtin_uaddl_overflow:
  2375. case Builtin::BI__builtin_uaddll_overflow:
  2376. IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
  2377. break;
  2378. case Builtin::BI__builtin_usub_overflow:
  2379. case Builtin::BI__builtin_usubl_overflow:
  2380. case Builtin::BI__builtin_usubll_overflow:
  2381. IntrinsicId = llvm::Intrinsic::usub_with_overflow;
  2382. break;
  2383. case Builtin::BI__builtin_umul_overflow:
  2384. case Builtin::BI__builtin_umull_overflow:
  2385. case Builtin::BI__builtin_umulll_overflow:
  2386. IntrinsicId = llvm::Intrinsic::umul_with_overflow;
  2387. break;
  2388. case Builtin::BI__builtin_sadd_overflow:
  2389. case Builtin::BI__builtin_saddl_overflow:
  2390. case Builtin::BI__builtin_saddll_overflow:
  2391. IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
  2392. break;
  2393. case Builtin::BI__builtin_ssub_overflow:
  2394. case Builtin::BI__builtin_ssubl_overflow:
  2395. case Builtin::BI__builtin_ssubll_overflow:
  2396. IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
  2397. break;
  2398. case Builtin::BI__builtin_smul_overflow:
  2399. case Builtin::BI__builtin_smull_overflow:
  2400. case Builtin::BI__builtin_smulll_overflow:
  2401. IntrinsicId = llvm::Intrinsic::smul_with_overflow;
  2402. break;
  2403. }
  2404. llvm::Value *Carry;
  2405. llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
  2406. Builder.CreateStore(Sum, SumOutPtr);
  2407. return RValue::get(Carry);
  2408. }
  2409. case Builtin::BI__builtin_addressof:
  2410. return RValue::get(EmitLValue(E->getArg(0)).getPointer());
  2411. case Builtin::BI__builtin_operator_new:
  2412. return EmitBuiltinNewDeleteCall(
  2413. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
  2414. case Builtin::BI__builtin_operator_delete:
  2415. return EmitBuiltinNewDeleteCall(
  2416. E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
  2417. case Builtin::BI__noop:
  2418. // __noop always evaluates to an integer literal zero.
  2419. return RValue::get(ConstantInt::get(IntTy, 0));
  2420. case Builtin::BI__builtin_call_with_static_chain: {
  2421. const CallExpr *Call = cast<CallExpr>(E->getArg(0));
  2422. const Expr *Chain = E->getArg(1);
  2423. return EmitCall(Call->getCallee()->getType(),
  2424. EmitCallee(Call->getCallee()), Call, ReturnValue,
  2425. EmitScalarExpr(Chain));
  2426. }
  2427. case Builtin::BI_InterlockedExchange8:
  2428. case Builtin::BI_InterlockedExchange16:
  2429. case Builtin::BI_InterlockedExchange:
  2430. case Builtin::BI_InterlockedExchangePointer:
  2431. return RValue::get(
  2432. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
  2433. case Builtin::BI_InterlockedCompareExchangePointer: {
  2434. llvm::Type *RTy;
  2435. llvm::IntegerType *IntType =
  2436. IntegerType::get(getLLVMContext(),
  2437. getContext().getTypeSize(E->getType()));
  2438. llvm::Type *IntPtrType = IntType->getPointerTo();
  2439. llvm::Value *Destination =
  2440. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
  2441. llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
  2442. RTy = Exchange->getType();
  2443. Exchange = Builder.CreatePtrToInt(Exchange, IntType);
  2444. llvm::Value *Comparand =
  2445. Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
  2446. auto Result =
  2447. Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  2448. AtomicOrdering::SequentiallyConsistent,
  2449. AtomicOrdering::SequentiallyConsistent);
  2450. Result->setVolatile(true);
  2451. return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
  2452. 0),
  2453. RTy));
  2454. }
  2455. case Builtin::BI_InterlockedCompareExchange8:
  2456. case Builtin::BI_InterlockedCompareExchange16:
  2457. case Builtin::BI_InterlockedCompareExchange:
  2458. case Builtin::BI_InterlockedCompareExchange64: {
  2459. AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
  2460. EmitScalarExpr(E->getArg(0)),
  2461. EmitScalarExpr(E->getArg(2)),
  2462. EmitScalarExpr(E->getArg(1)),
  2463. AtomicOrdering::SequentiallyConsistent,
  2464. AtomicOrdering::SequentiallyConsistent);
  2465. CXI->setVolatile(true);
  2466. return RValue::get(Builder.CreateExtractValue(CXI, 0));
  2467. }
  2468. case Builtin::BI_InterlockedIncrement16:
  2469. case Builtin::BI_InterlockedIncrement:
  2470. return RValue::get(
  2471. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
  2472. case Builtin::BI_InterlockedDecrement16:
  2473. case Builtin::BI_InterlockedDecrement:
  2474. return RValue::get(
  2475. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
  2476. case Builtin::BI_InterlockedAnd8:
  2477. case Builtin::BI_InterlockedAnd16:
  2478. case Builtin::BI_InterlockedAnd:
  2479. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
  2480. case Builtin::BI_InterlockedExchangeAdd8:
  2481. case Builtin::BI_InterlockedExchangeAdd16:
  2482. case Builtin::BI_InterlockedExchangeAdd:
  2483. return RValue::get(
  2484. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
  2485. case Builtin::BI_InterlockedExchangeSub8:
  2486. case Builtin::BI_InterlockedExchangeSub16:
  2487. case Builtin::BI_InterlockedExchangeSub:
  2488. return RValue::get(
  2489. EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
  2490. case Builtin::BI_InterlockedOr8:
  2491. case Builtin::BI_InterlockedOr16:
  2492. case Builtin::BI_InterlockedOr:
  2493. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
  2494. case Builtin::BI_InterlockedXor8:
  2495. case Builtin::BI_InterlockedXor16:
  2496. case Builtin::BI_InterlockedXor:
  2497. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
  2498. case Builtin::BI_interlockedbittestandset:
  2499. return RValue::get(
  2500. EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
  2501. case Builtin::BI__exception_code:
  2502. case Builtin::BI_exception_code:
  2503. return RValue::get(EmitSEHExceptionCode());
  2504. case Builtin::BI__exception_info:
  2505. case Builtin::BI_exception_info:
  2506. return RValue::get(EmitSEHExceptionInfo());
  2507. case Builtin::BI__abnormal_termination:
  2508. case Builtin::BI_abnormal_termination:
  2509. return RValue::get(EmitSEHAbnormalTermination());
  2510. case Builtin::BI_setjmpex: {
  2511. if (getTarget().getTriple().isOSMSVCRT()) {
  2512. llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
  2513. llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
  2514. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  2515. llvm::Attribute::ReturnsTwice);
  2516. llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
  2517. llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
  2518. "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
  2519. llvm::Value *Buf = Builder.CreateBitOrPointerCast(
  2520. EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  2521. llvm::Value *FrameAddr =
  2522. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
  2523. ConstantInt::get(Int32Ty, 0));
  2524. llvm::Value *Args[] = {Buf, FrameAddr};
  2525. llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
  2526. CS.setAttributes(ReturnsTwiceAttr);
  2527. return RValue::get(CS.getInstruction());
  2528. }
  2529. break;
  2530. }
  2531. case Builtin::BI_setjmp: {
  2532. if (getTarget().getTriple().isOSMSVCRT()) {
  2533. llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
  2534. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  2535. llvm::Attribute::ReturnsTwice);
  2536. llvm::Value *Buf = Builder.CreateBitOrPointerCast(
  2537. EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  2538. llvm::CallSite CS;
  2539. if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
  2540. llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
  2541. llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
  2542. llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
  2543. "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
  2544. llvm::Value *Count = ConstantInt::get(IntTy, 0);
  2545. llvm::Value *Args[] = {Buf, Count};
  2546. CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
  2547. } else {
  2548. llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
  2549. llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
  2550. llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
  2551. "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
  2552. llvm::Value *FrameAddr =
  2553. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
  2554. ConstantInt::get(Int32Ty, 0));
  2555. llvm::Value *Args[] = {Buf, FrameAddr};
  2556. CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
  2557. }
  2558. CS.setAttributes(ReturnsTwiceAttr);
  2559. return RValue::get(CS.getInstruction());
  2560. }
  2561. break;
  2562. }
  2563. case Builtin::BI__GetExceptionInfo: {
  2564. if (llvm::GlobalVariable *GV =
  2565. CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
  2566. return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
  2567. break;
  2568. }
  2569. case Builtin::BI__fastfail:
  2570. return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
  2571. case Builtin::BI__builtin_coro_size: {
  2572. auto & Context = getContext();
  2573. auto SizeTy = Context.getSizeType();
  2574. auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
  2575. Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
  2576. return RValue::get(Builder.CreateCall(F));
  2577. }
  2578. case Builtin::BI__builtin_coro_id:
  2579. return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
  2580. case Builtin::BI__builtin_coro_promise:
  2581. return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
  2582. case Builtin::BI__builtin_coro_resume:
  2583. return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
  2584. case Builtin::BI__builtin_coro_frame:
  2585. return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
  2586. case Builtin::BI__builtin_coro_noop:
  2587. return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
  2588. case Builtin::BI__builtin_coro_free:
  2589. return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
  2590. case Builtin::BI__builtin_coro_destroy:
  2591. return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
  2592. case Builtin::BI__builtin_coro_done:
  2593. return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
  2594. case Builtin::BI__builtin_coro_alloc:
  2595. return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
  2596. case Builtin::BI__builtin_coro_begin:
  2597. return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
  2598. case Builtin::BI__builtin_coro_end:
  2599. return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
  2600. case Builtin::BI__builtin_coro_suspend:
  2601. return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
  2602. case Builtin::BI__builtin_coro_param:
  2603. return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
  2604. // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
  2605. case Builtin::BIread_pipe:
  2606. case Builtin::BIwrite_pipe: {
  2607. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  2608. *Arg1 = EmitScalarExpr(E->getArg(1));
  2609. CGOpenCLRuntime OpenCLRT(CGM);
  2610. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  2611. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  2612. // Type of the generic packet parameter.
  2613. unsigned GenericAS =
  2614. getContext().getTargetAddressSpace(LangAS::opencl_generic);
  2615. llvm::Type *I8PTy = llvm::PointerType::get(
  2616. llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
  2617. // Testing which overloaded version we should generate the call for.
  2618. if (2U == E->getNumArgs()) {
  2619. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
  2620. : "__write_pipe_2";
  2621. // Creating a generic function type to be able to call with any builtin or
  2622. // user defined type.
  2623. llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
  2624. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2625. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2626. Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
  2627. return RValue::get(
  2628. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2629. {Arg0, BCast, PacketSize, PacketAlign}));
  2630. } else {
  2631. assert(4 == E->getNumArgs() &&
  2632. "Illegal number of parameters to pipe function");
  2633. const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
  2634. : "__write_pipe_4";
  2635. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
  2636. Int32Ty, Int32Ty};
  2637. Value *Arg2 = EmitScalarExpr(E->getArg(2)),
  2638. *Arg3 = EmitScalarExpr(E->getArg(3));
  2639. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2640. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2641. Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
  2642. // We know the third argument is an integer type, but we may need to cast
  2643. // it to i32.
  2644. if (Arg2->getType() != Int32Ty)
  2645. Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
  2646. return RValue::get(Builder.CreateCall(
  2647. CGM.CreateRuntimeFunction(FTy, Name),
  2648. {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
  2649. }
  2650. }
  2651. // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
  2652. // functions
  2653. case Builtin::BIreserve_read_pipe:
  2654. case Builtin::BIreserve_write_pipe:
  2655. case Builtin::BIwork_group_reserve_read_pipe:
  2656. case Builtin::BIwork_group_reserve_write_pipe:
  2657. case Builtin::BIsub_group_reserve_read_pipe:
  2658. case Builtin::BIsub_group_reserve_write_pipe: {
  2659. // Composing the mangled name for the function.
  2660. const char *Name;
  2661. if (BuiltinID == Builtin::BIreserve_read_pipe)
  2662. Name = "__reserve_read_pipe";
  2663. else if (BuiltinID == Builtin::BIreserve_write_pipe)
  2664. Name = "__reserve_write_pipe";
  2665. else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
  2666. Name = "__work_group_reserve_read_pipe";
  2667. else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
  2668. Name = "__work_group_reserve_write_pipe";
  2669. else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
  2670. Name = "__sub_group_reserve_read_pipe";
  2671. else
  2672. Name = "__sub_group_reserve_write_pipe";
  2673. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  2674. *Arg1 = EmitScalarExpr(E->getArg(1));
  2675. llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
  2676. CGOpenCLRuntime OpenCLRT(CGM);
  2677. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  2678. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  2679. // Building the generic function prototype.
  2680. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
  2681. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2682. ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2683. // We know the second argument is an integer type, but we may need to cast
  2684. // it to i32.
  2685. if (Arg1->getType() != Int32Ty)
  2686. Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
  2687. return RValue::get(
  2688. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2689. {Arg0, Arg1, PacketSize, PacketAlign}));
  2690. }
  2691. // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
  2692. // functions
  2693. case Builtin::BIcommit_read_pipe:
  2694. case Builtin::BIcommit_write_pipe:
  2695. case Builtin::BIwork_group_commit_read_pipe:
  2696. case Builtin::BIwork_group_commit_write_pipe:
  2697. case Builtin::BIsub_group_commit_read_pipe:
  2698. case Builtin::BIsub_group_commit_write_pipe: {
  2699. const char *Name;
  2700. if (BuiltinID == Builtin::BIcommit_read_pipe)
  2701. Name = "__commit_read_pipe";
  2702. else if (BuiltinID == Builtin::BIcommit_write_pipe)
  2703. Name = "__commit_write_pipe";
  2704. else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
  2705. Name = "__work_group_commit_read_pipe";
  2706. else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
  2707. Name = "__work_group_commit_write_pipe";
  2708. else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
  2709. Name = "__sub_group_commit_read_pipe";
  2710. else
  2711. Name = "__sub_group_commit_write_pipe";
  2712. Value *Arg0 = EmitScalarExpr(E->getArg(0)),
  2713. *Arg1 = EmitScalarExpr(E->getArg(1));
  2714. CGOpenCLRuntime OpenCLRT(CGM);
  2715. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  2716. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  2717. // Building the generic function prototype.
  2718. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
  2719. llvm::FunctionType *FTy =
  2720. llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
  2721. llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2722. return RValue::get(
  2723. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2724. {Arg0, Arg1, PacketSize, PacketAlign}));
  2725. }
  2726. // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
  2727. case Builtin::BIget_pipe_num_packets:
  2728. case Builtin::BIget_pipe_max_packets: {
  2729. const char *BaseName;
  2730. const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
  2731. if (BuiltinID == Builtin::BIget_pipe_num_packets)
  2732. BaseName = "__get_pipe_num_packets";
  2733. else
  2734. BaseName = "__get_pipe_max_packets";
  2735. auto Name = std::string(BaseName) +
  2736. std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
  2737. // Building the generic function prototype.
  2738. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  2739. CGOpenCLRuntime OpenCLRT(CGM);
  2740. Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
  2741. Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
  2742. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
  2743. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2744. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2745. return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2746. {Arg0, PacketSize, PacketAlign}));
  2747. }
  2748. // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
  2749. case Builtin::BIto_global:
  2750. case Builtin::BIto_local:
  2751. case Builtin::BIto_private: {
  2752. auto Arg0 = EmitScalarExpr(E->getArg(0));
  2753. auto NewArgT = llvm::PointerType::get(Int8Ty,
  2754. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2755. auto NewRetT = llvm::PointerType::get(Int8Ty,
  2756. CGM.getContext().getTargetAddressSpace(
  2757. E->getType()->getPointeeType().getAddressSpace()));
  2758. auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
  2759. llvm::Value *NewArg;
  2760. if (Arg0->getType()->getPointerAddressSpace() !=
  2761. NewArgT->getPointerAddressSpace())
  2762. NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
  2763. else
  2764. NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
  2765. auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
  2766. auto NewCall =
  2767. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
  2768. return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
  2769. ConvertType(E->getType())));
  2770. }
  2771. // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
  2772. // It contains four different overload formats specified in Table 6.13.17.1.
  2773. case Builtin::BIenqueue_kernel: {
  2774. StringRef Name; // Generated function call name
  2775. unsigned NumArgs = E->getNumArgs();
  2776. llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
  2777. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  2778. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2779. llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
  2780. llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
  2781. LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
  2782. llvm::Value *Range = NDRangeL.getAddress().getPointer();
  2783. llvm::Type *RangeTy = NDRangeL.getAddress().getType();
  2784. if (NumArgs == 4) {
  2785. // The most basic form of the call with parameters:
  2786. // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
  2787. Name = "__enqueue_kernel_basic";
  2788. llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
  2789. GenericVoidPtrTy};
  2790. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2791. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2792. auto Info =
  2793. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  2794. llvm::Value *Kernel =
  2795. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2796. llvm::Value *Block =
  2797. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2798. AttrBuilder B;
  2799. B.addAttribute(Attribute::ByVal);
  2800. llvm::AttributeList ByValAttrSet =
  2801. llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
  2802. auto RTCall =
  2803. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
  2804. {Queue, Flags, Range, Kernel, Block});
  2805. RTCall->setAttributes(ByValAttrSet);
  2806. return RValue::get(RTCall);
  2807. }
  2808. assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
  2809. // Create a temporary array to hold the sizes of local pointer arguments
  2810. // for the block. \p First is the position of the first size argument.
  2811. auto CreateArrayForSizeVar = [=](unsigned First) {
  2812. auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
  2813. auto *Arr = Builder.CreateAlloca(AT);
  2814. llvm::Value *Ptr;
  2815. // Each of the following arguments specifies the size of the corresponding
  2816. // argument passed to the enqueued block.
  2817. auto *Zero = llvm::ConstantInt::get(IntTy, 0);
  2818. for (unsigned I = First; I < NumArgs; ++I) {
  2819. auto *Index = llvm::ConstantInt::get(IntTy, I - First);
  2820. auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
  2821. if (I == First)
  2822. Ptr = GEP;
  2823. auto *V =
  2824. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
  2825. Builder.CreateAlignedStore(
  2826. V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
  2827. }
  2828. return Ptr;
  2829. };
  2830. // Could have events and/or vaargs.
  2831. if (E->getArg(3)->getType()->isBlockPointerType()) {
  2832. // No events passed, but has variadic arguments.
  2833. Name = "__enqueue_kernel_vaargs";
  2834. auto Info =
  2835. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
  2836. llvm::Value *Kernel =
  2837. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2838. auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2839. auto *PtrToSizeArray = CreateArrayForSizeVar(4);
  2840. // Create a vector of the arguments, as well as a constant value to
  2841. // express to the runtime the number of variadic arguments.
  2842. std::vector<llvm::Value *> Args = {
  2843. Queue, Flags, Range,
  2844. Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
  2845. PtrToSizeArray};
  2846. std::vector<llvm::Type *> ArgTys = {
  2847. QueueTy, IntTy, RangeTy,
  2848. GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
  2849. PtrToSizeArray->getType()};
  2850. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2851. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2852. return RValue::get(
  2853. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2854. llvm::ArrayRef<llvm::Value *>(Args)));
  2855. }
  2856. // Any calls now have event arguments passed.
  2857. if (NumArgs >= 7) {
  2858. llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
  2859. llvm::Type *EventPtrTy = EventTy->getPointerTo(
  2860. CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2861. llvm::Value *NumEvents =
  2862. Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
  2863. llvm::Value *EventList =
  2864. E->getArg(4)->getType()->isArrayType()
  2865. ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
  2866. : EmitScalarExpr(E->getArg(4));
  2867. llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
  2868. // Convert to generic address space.
  2869. EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
  2870. ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
  2871. auto Info =
  2872. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
  2873. llvm::Value *Kernel =
  2874. Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2875. llvm::Value *Block =
  2876. Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2877. std::vector<llvm::Type *> ArgTys = {
  2878. QueueTy, Int32Ty, RangeTy, Int32Ty,
  2879. EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
  2880. std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
  2881. EventList, ClkEvent, Kernel, Block};
  2882. if (NumArgs == 7) {
  2883. // Has events but no variadics.
  2884. Name = "__enqueue_kernel_basic_events";
  2885. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2886. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2887. return RValue::get(
  2888. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2889. llvm::ArrayRef<llvm::Value *>(Args)));
  2890. }
  2891. // Has event info and variadics
  2892. // Pass the number of variadics to the runtime function too.
  2893. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
  2894. ArgTys.push_back(Int32Ty);
  2895. Name = "__enqueue_kernel_events_vaargs";
  2896. auto *PtrToSizeArray = CreateArrayForSizeVar(7);
  2897. Args.push_back(PtrToSizeArray);
  2898. ArgTys.push_back(PtrToSizeArray->getType());
  2899. llvm::FunctionType *FTy = llvm::FunctionType::get(
  2900. Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
  2901. return RValue::get(
  2902. Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  2903. llvm::ArrayRef<llvm::Value *>(Args)));
  2904. }
  2905. LLVM_FALLTHROUGH;
  2906. }
  2907. // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
  2908. // parameter.
  2909. case Builtin::BIget_kernel_work_group_size: {
  2910. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  2911. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2912. auto Info =
  2913. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  2914. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2915. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2916. return RValue::get(Builder.CreateCall(
  2917. CGM.CreateRuntimeFunction(
  2918. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  2919. false),
  2920. "__get_kernel_work_group_size_impl"),
  2921. {Kernel, Arg}));
  2922. }
  2923. case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
  2924. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  2925. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2926. auto Info =
  2927. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
  2928. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2929. Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2930. return RValue::get(Builder.CreateCall(
  2931. CGM.CreateRuntimeFunction(
  2932. llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
  2933. false),
  2934. "__get_kernel_preferred_work_group_multiple_impl"),
  2935. {Kernel, Arg}));
  2936. }
  2937. case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
  2938. case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
  2939. llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
  2940. getContext().getTargetAddressSpace(LangAS::opencl_generic));
  2941. LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
  2942. llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
  2943. auto Info =
  2944. CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
  2945. Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
  2946. Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
  2947. const char *Name =
  2948. BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
  2949. ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
  2950. : "__get_kernel_sub_group_count_for_ndrange_impl";
  2951. return RValue::get(Builder.CreateCall(
  2952. CGM.CreateRuntimeFunction(
  2953. llvm::FunctionType::get(
  2954. IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
  2955. false),
  2956. Name),
  2957. {NDRange, Kernel, Block}));
  2958. }
  2959. case Builtin::BI__builtin_store_half:
  2960. case Builtin::BI__builtin_store_halff: {
  2961. Value *Val = EmitScalarExpr(E->getArg(0));
  2962. Address Address = EmitPointerWithAlignment(E->getArg(1));
  2963. Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
  2964. return RValue::get(Builder.CreateStore(HalfVal, Address));
  2965. }
  2966. case Builtin::BI__builtin_load_half: {
  2967. Address Address = EmitPointerWithAlignment(E->getArg(0));
  2968. Value *HalfVal = Builder.CreateLoad(Address);
  2969. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
  2970. }
  2971. case Builtin::BI__builtin_load_halff: {
  2972. Address Address = EmitPointerWithAlignment(E->getArg(0));
  2973. Value *HalfVal = Builder.CreateLoad(Address);
  2974. return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
  2975. }
  2976. case Builtin::BIprintf:
  2977. if (getTarget().getTriple().isNVPTX())
  2978. return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
  2979. break;
  2980. case Builtin::BI__builtin_canonicalize:
  2981. case Builtin::BI__builtin_canonicalizef:
  2982. case Builtin::BI__builtin_canonicalizel:
  2983. return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
  2984. case Builtin::BI__builtin_thread_pointer: {
  2985. if (!getContext().getTargetInfo().isTLSSupported())
  2986. CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
  2987. // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
  2988. break;
  2989. }
  2990. case Builtin::BI__builtin_os_log_format:
  2991. return emitBuiltinOSLogFormat(*E);
  2992. case Builtin::BI__builtin_os_log_format_buffer_size: {
  2993. analyze_os_log::OSLogBufferLayout Layout;
  2994. analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
  2995. return RValue::get(ConstantInt::get(ConvertType(E->getType()),
  2996. Layout.size().getQuantity()));
  2997. }
  2998. case Builtin::BI__xray_customevent: {
  2999. if (!ShouldXRayInstrumentFunction())
  3000. return RValue::getIgnored();
  3001. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  3002. XRayInstrKind::Custom))
  3003. return RValue::getIgnored();
  3004. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  3005. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
  3006. return RValue::getIgnored();
  3007. Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
  3008. auto FTy = F->getFunctionType();
  3009. auto Arg0 = E->getArg(0);
  3010. auto Arg0Val = EmitScalarExpr(Arg0);
  3011. auto Arg0Ty = Arg0->getType();
  3012. auto PTy0 = FTy->getParamType(0);
  3013. if (PTy0 != Arg0Val->getType()) {
  3014. if (Arg0Ty->isArrayType())
  3015. Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
  3016. else
  3017. Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
  3018. }
  3019. auto Arg1 = EmitScalarExpr(E->getArg(1));
  3020. auto PTy1 = FTy->getParamType(1);
  3021. if (PTy1 != Arg1->getType())
  3022. Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
  3023. return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
  3024. }
  3025. case Builtin::BI__xray_typedevent: {
  3026. // TODO: There should be a way to always emit events even if the current
  3027. // function is not instrumented. Losing events in a stream can cripple
  3028. // a trace.
  3029. if (!ShouldXRayInstrumentFunction())
  3030. return RValue::getIgnored();
  3031. if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
  3032. XRayInstrKind::Typed))
  3033. return RValue::getIgnored();
  3034. if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
  3035. if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
  3036. return RValue::getIgnored();
  3037. Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
  3038. auto FTy = F->getFunctionType();
  3039. auto Arg0 = EmitScalarExpr(E->getArg(0));
  3040. auto PTy0 = FTy->getParamType(0);
  3041. if (PTy0 != Arg0->getType())
  3042. Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
  3043. auto Arg1 = E->getArg(1);
  3044. auto Arg1Val = EmitScalarExpr(Arg1);
  3045. auto Arg1Ty = Arg1->getType();
  3046. auto PTy1 = FTy->getParamType(1);
  3047. if (PTy1 != Arg1Val->getType()) {
  3048. if (Arg1Ty->isArrayType())
  3049. Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
  3050. else
  3051. Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
  3052. }
  3053. auto Arg2 = EmitScalarExpr(E->getArg(2));
  3054. auto PTy2 = FTy->getParamType(2);
  3055. if (PTy2 != Arg2->getType())
  3056. Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
  3057. return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
  3058. }
  3059. case Builtin::BI__builtin_ms_va_start:
  3060. case Builtin::BI__builtin_ms_va_end:
  3061. return RValue::get(
  3062. EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
  3063. BuiltinID == Builtin::BI__builtin_ms_va_start));
  3064. case Builtin::BI__builtin_ms_va_copy: {
  3065. // Lower this manually. We can't reliably determine whether or not any
  3066. // given va_copy() is for a Win64 va_list from the calling convention
  3067. // alone, because it's legal to do this from a System V ABI function.
  3068. // With opaque pointer types, we won't have enough information in LLVM
  3069. // IR to determine this from the argument types, either. Best to do it
  3070. // now, while we have enough information.
  3071. Address DestAddr = EmitMSVAListRef(E->getArg(0));
  3072. Address SrcAddr = EmitMSVAListRef(E->getArg(1));
  3073. llvm::Type *BPP = Int8PtrPtrTy;
  3074. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
  3075. DestAddr.getAlignment());
  3076. SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
  3077. SrcAddr.getAlignment());
  3078. Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
  3079. return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
  3080. }
  3081. }
  3082. // If this is an alias for a lib function (e.g. __builtin_sin), emit
  3083. // the call using the normal call path, but using the unmangled
  3084. // version of the function name.
  3085. if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
  3086. return emitLibraryCall(*this, FD, E,
  3087. CGM.getBuiltinLibFunction(FD, BuiltinID));
  3088. // If this is a predefined lib function (e.g. malloc), emit the call
  3089. // using exactly the normal call path.
  3090. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
  3091. return emitLibraryCall(*this, FD, E,
  3092. cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
  3093. // Check that a call to a target specific builtin has the correct target
  3094. // features.
  3095. // This is down here to avoid non-target specific builtins, however, if
  3096. // generic builtins start to require generic target features then we
  3097. // can move this up to the beginning of the function.
  3098. checkTargetFeatures(E, FD);
  3099. // See if we have a target specific intrinsic.
  3100. const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
  3101. Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
  3102. StringRef Prefix =
  3103. llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
  3104. if (!Prefix.empty()) {
  3105. IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
  3106. // NOTE we don't need to perform a compatibility flag check here since the
  3107. // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
  3108. // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
  3109. if (IntrinsicID == Intrinsic::not_intrinsic)
  3110. IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
  3111. }
  3112. if (IntrinsicID != Intrinsic::not_intrinsic) {
  3113. SmallVector<Value*, 16> Args;
  3114. // Find out if any arguments are required to be integer constant
  3115. // expressions.
  3116. unsigned ICEArguments = 0;
  3117. ASTContext::GetBuiltinTypeError Error;
  3118. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  3119. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  3120. Function *F = CGM.getIntrinsic(IntrinsicID);
  3121. llvm::FunctionType *FTy = F->getFunctionType();
  3122. for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
  3123. Value *ArgValue;
  3124. // If this is a normal argument, just emit it as a scalar.
  3125. if ((ICEArguments & (1 << i)) == 0) {
  3126. ArgValue = EmitScalarExpr(E->getArg(i));
  3127. } else {
  3128. // If this is required to be a constant, constant fold it so that we
  3129. // know that the generated intrinsic gets a ConstantInt.
  3130. llvm::APSInt Result;
  3131. bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
  3132. assert(IsConst && "Constant arg isn't actually constant?");
  3133. (void)IsConst;
  3134. ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
  3135. }
  3136. // If the intrinsic arg type is different from the builtin arg type
  3137. // we need to do a bit cast.
  3138. llvm::Type *PTy = FTy->getParamType(i);
  3139. if (PTy != ArgValue->getType()) {
  3140. assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
  3141. "Must be able to losslessly bit cast to param");
  3142. ArgValue = Builder.CreateBitCast(ArgValue, PTy);
  3143. }
  3144. Args.push_back(ArgValue);
  3145. }
  3146. Value *V = Builder.CreateCall(F, Args);
  3147. QualType BuiltinRetType = E->getType();
  3148. llvm::Type *RetTy = VoidTy;
  3149. if (!BuiltinRetType->isVoidType())
  3150. RetTy = ConvertType(BuiltinRetType);
  3151. if (RetTy != V->getType()) {
  3152. assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
  3153. "Must be able to losslessly bit cast result type");
  3154. V = Builder.CreateBitCast(V, RetTy);
  3155. }
  3156. return RValue::get(V);
  3157. }
  3158. // See if we have a target specific builtin that needs to be lowered.
  3159. if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
  3160. return RValue::get(V);
  3161. ErrorUnsupported(E, "builtin function");
  3162. // Unknown builtin, for now just dump it out and return undef.
  3163. return GetUndefRValue(E->getType());
  3164. }
  3165. static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
  3166. unsigned BuiltinID, const CallExpr *E,
  3167. llvm::Triple::ArchType Arch) {
  3168. switch (Arch) {
  3169. case llvm::Triple::arm:
  3170. case llvm::Triple::armeb:
  3171. case llvm::Triple::thumb:
  3172. case llvm::Triple::thumbeb:
  3173. return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
  3174. case llvm::Triple::aarch64:
  3175. case llvm::Triple::aarch64_be:
  3176. return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
  3177. case llvm::Triple::x86:
  3178. case llvm::Triple::x86_64:
  3179. return CGF->EmitX86BuiltinExpr(BuiltinID, E);
  3180. case llvm::Triple::ppc:
  3181. case llvm::Triple::ppc64:
  3182. case llvm::Triple::ppc64le:
  3183. return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
  3184. case llvm::Triple::r600:
  3185. case llvm::Triple::amdgcn:
  3186. return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
  3187. case llvm::Triple::systemz:
  3188. return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
  3189. case llvm::Triple::nvptx:
  3190. case llvm::Triple::nvptx64:
  3191. return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
  3192. case llvm::Triple::wasm32:
  3193. case llvm::Triple::wasm64:
  3194. return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
  3195. case llvm::Triple::hexagon:
  3196. return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
  3197. default:
  3198. return nullptr;
  3199. }
  3200. }
  3201. Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
  3202. const CallExpr *E) {
  3203. if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
  3204. assert(getContext().getAuxTargetInfo() && "Missing aux target info");
  3205. return EmitTargetArchBuiltinExpr(
  3206. this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
  3207. getContext().getAuxTargetInfo()->getTriple().getArch());
  3208. }
  3209. return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
  3210. getTarget().getTriple().getArch());
  3211. }
  3212. static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
  3213. NeonTypeFlags TypeFlags,
  3214. bool HasLegalHalfType=true,
  3215. bool V1Ty=false) {
  3216. int IsQuad = TypeFlags.isQuad();
  3217. switch (TypeFlags.getEltType()) {
  3218. case NeonTypeFlags::Int8:
  3219. case NeonTypeFlags::Poly8:
  3220. return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
  3221. case NeonTypeFlags::Int16:
  3222. case NeonTypeFlags::Poly16:
  3223. return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  3224. case NeonTypeFlags::Float16:
  3225. if (HasLegalHalfType)
  3226. return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
  3227. else
  3228. return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
  3229. case NeonTypeFlags::Int32:
  3230. return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
  3231. case NeonTypeFlags::Int64:
  3232. case NeonTypeFlags::Poly64:
  3233. return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
  3234. case NeonTypeFlags::Poly128:
  3235. // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
  3236. // There is a lot of i128 and f128 API missing.
  3237. // so we use v16i8 to represent poly128 and get pattern matched.
  3238. return llvm::VectorType::get(CGF->Int8Ty, 16);
  3239. case NeonTypeFlags::Float32:
  3240. return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
  3241. case NeonTypeFlags::Float64:
  3242. return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
  3243. }
  3244. llvm_unreachable("Unknown vector element type!");
  3245. }
  3246. static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
  3247. NeonTypeFlags IntTypeFlags) {
  3248. int IsQuad = IntTypeFlags.isQuad();
  3249. switch (IntTypeFlags.getEltType()) {
  3250. case NeonTypeFlags::Int16:
  3251. return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
  3252. case NeonTypeFlags::Int32:
  3253. return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
  3254. case NeonTypeFlags::Int64:
  3255. return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
  3256. default:
  3257. llvm_unreachable("Type can't be converted to floating-point!");
  3258. }
  3259. }
  3260. Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
  3261. unsigned nElts = V->getType()->getVectorNumElements();
  3262. Value* SV = llvm::ConstantVector::getSplat(nElts, C);
  3263. return Builder.CreateShuffleVector(V, V, SV, "lane");
  3264. }
  3265. Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
  3266. const char *name,
  3267. unsigned shift, bool rightshift) {
  3268. unsigned j = 0;
  3269. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  3270. ai != ae; ++ai, ++j)
  3271. if (shift > 0 && shift == j)
  3272. Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
  3273. else
  3274. Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
  3275. return Builder.CreateCall(F, Ops, name);
  3276. }
  3277. Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
  3278. bool neg) {
  3279. int SV = cast<ConstantInt>(V)->getSExtValue();
  3280. return ConstantInt::get(Ty, neg ? -SV : SV);
  3281. }
  3282. // \brief Right-shift a vector by a constant.
  3283. Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
  3284. llvm::Type *Ty, bool usgn,
  3285. const char *name) {
  3286. llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
  3287. int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
  3288. int EltSize = VTy->getScalarSizeInBits();
  3289. Vec = Builder.CreateBitCast(Vec, Ty);
  3290. // lshr/ashr are undefined when the shift amount is equal to the vector
  3291. // element size.
  3292. if (ShiftAmt == EltSize) {
  3293. if (usgn) {
  3294. // Right-shifting an unsigned value by its size yields 0.
  3295. return llvm::ConstantAggregateZero::get(VTy);
  3296. } else {
  3297. // Right-shifting a signed value by its size is equivalent
  3298. // to a shift of size-1.
  3299. --ShiftAmt;
  3300. Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
  3301. }
  3302. }
  3303. Shift = EmitNeonShiftVector(Shift, Ty, false);
  3304. if (usgn)
  3305. return Builder.CreateLShr(Vec, Shift, name);
  3306. else
  3307. return Builder.CreateAShr(Vec, Shift, name);
  3308. }
  3309. enum {
  3310. AddRetType = (1 << 0),
  3311. Add1ArgType = (1 << 1),
  3312. Add2ArgTypes = (1 << 2),
  3313. VectorizeRetType = (1 << 3),
  3314. VectorizeArgTypes = (1 << 4),
  3315. InventFloatType = (1 << 5),
  3316. UnsignedAlts = (1 << 6),
  3317. Use64BitVectors = (1 << 7),
  3318. Use128BitVectors = (1 << 8),
  3319. Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
  3320. VectorRet = AddRetType | VectorizeRetType,
  3321. VectorRetGetArgs01 =
  3322. AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
  3323. FpCmpzModifiers =
  3324. AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
  3325. };
  3326. namespace {
  3327. struct NeonIntrinsicInfo {
  3328. const char *NameHint;
  3329. unsigned BuiltinID;
  3330. unsigned LLVMIntrinsic;
  3331. unsigned AltLLVMIntrinsic;
  3332. unsigned TypeModifier;
  3333. bool operator<(unsigned RHSBuiltinID) const {
  3334. return BuiltinID < RHSBuiltinID;
  3335. }
  3336. bool operator<(const NeonIntrinsicInfo &TE) const {
  3337. return BuiltinID < TE.BuiltinID;
  3338. }
  3339. };
  3340. } // end anonymous namespace
  3341. #define NEONMAP0(NameBase) \
  3342. { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
  3343. #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
  3344. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  3345. Intrinsic::LLVMIntrinsic, 0, TypeModifier }
  3346. #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
  3347. { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
  3348. Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
  3349. TypeModifier }
  3350. static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
  3351. NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  3352. NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
  3353. NEONMAP1(vabs_v, arm_neon_vabs, 0),
  3354. NEONMAP1(vabsq_v, arm_neon_vabs, 0),
  3355. NEONMAP0(vaddhn_v),
  3356. NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
  3357. NEONMAP1(vaeseq_v, arm_neon_aese, 0),
  3358. NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
  3359. NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
  3360. NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
  3361. NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
  3362. NEONMAP1(vcage_v, arm_neon_vacge, 0),
  3363. NEONMAP1(vcageq_v, arm_neon_vacge, 0),
  3364. NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
  3365. NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
  3366. NEONMAP1(vcale_v, arm_neon_vacge, 0),
  3367. NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
  3368. NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
  3369. NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
  3370. NEONMAP0(vceqz_v),
  3371. NEONMAP0(vceqzq_v),
  3372. NEONMAP0(vcgez_v),
  3373. NEONMAP0(vcgezq_v),
  3374. NEONMAP0(vcgtz_v),
  3375. NEONMAP0(vcgtzq_v),
  3376. NEONMAP0(vclez_v),
  3377. NEONMAP0(vclezq_v),
  3378. NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
  3379. NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
  3380. NEONMAP0(vcltz_v),
  3381. NEONMAP0(vcltzq_v),
  3382. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  3383. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  3384. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  3385. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  3386. NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
  3387. NEONMAP0(vcvt_f16_v),
  3388. NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
  3389. NEONMAP0(vcvt_f32_v),
  3390. NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  3391. NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  3392. NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
  3393. NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  3394. NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  3395. NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
  3396. NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  3397. NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  3398. NEONMAP0(vcvt_s16_v),
  3399. NEONMAP0(vcvt_s32_v),
  3400. NEONMAP0(vcvt_s64_v),
  3401. NEONMAP0(vcvt_u16_v),
  3402. NEONMAP0(vcvt_u32_v),
  3403. NEONMAP0(vcvt_u64_v),
  3404. NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
  3405. NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
  3406. NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
  3407. NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
  3408. NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
  3409. NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
  3410. NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
  3411. NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
  3412. NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
  3413. NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
  3414. NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
  3415. NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
  3416. NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
  3417. NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
  3418. NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
  3419. NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
  3420. NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
  3421. NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
  3422. NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
  3423. NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
  3424. NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
  3425. NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
  3426. NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
  3427. NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
  3428. NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
  3429. NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
  3430. NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
  3431. NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
  3432. NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
  3433. NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
  3434. NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
  3435. NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
  3436. NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
  3437. NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
  3438. NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
  3439. NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
  3440. NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
  3441. NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
  3442. NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
  3443. NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
  3444. NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
  3445. NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
  3446. NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
  3447. NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
  3448. NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
  3449. NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
  3450. NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
  3451. NEONMAP0(vcvtq_f16_v),
  3452. NEONMAP0(vcvtq_f32_v),
  3453. NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  3454. NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
  3455. NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
  3456. NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
  3457. NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
  3458. NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
  3459. NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
  3460. NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
  3461. NEONMAP0(vcvtq_s16_v),
  3462. NEONMAP0(vcvtq_s32_v),
  3463. NEONMAP0(vcvtq_s64_v),
  3464. NEONMAP0(vcvtq_u16_v),
  3465. NEONMAP0(vcvtq_u32_v),
  3466. NEONMAP0(vcvtq_u64_v),
  3467. NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
  3468. NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
  3469. NEONMAP0(vext_v),
  3470. NEONMAP0(vextq_v),
  3471. NEONMAP0(vfma_v),
  3472. NEONMAP0(vfmaq_v),
  3473. NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  3474. NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
  3475. NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  3476. NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
  3477. NEONMAP0(vld1_dup_v),
  3478. NEONMAP1(vld1_v, arm_neon_vld1, 0),
  3479. NEONMAP0(vld1q_dup_v),
  3480. NEONMAP1(vld1q_v, arm_neon_vld1, 0),
  3481. NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
  3482. NEONMAP1(vld2_v, arm_neon_vld2, 0),
  3483. NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
  3484. NEONMAP1(vld2q_v, arm_neon_vld2, 0),
  3485. NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
  3486. NEONMAP1(vld3_v, arm_neon_vld3, 0),
  3487. NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
  3488. NEONMAP1(vld3q_v, arm_neon_vld3, 0),
  3489. NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
  3490. NEONMAP1(vld4_v, arm_neon_vld4, 0),
  3491. NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
  3492. NEONMAP1(vld4q_v, arm_neon_vld4, 0),
  3493. NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  3494. NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
  3495. NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
  3496. NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
  3497. NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  3498. NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
  3499. NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
  3500. NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
  3501. NEONMAP0(vmovl_v),
  3502. NEONMAP0(vmovn_v),
  3503. NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
  3504. NEONMAP0(vmull_v),
  3505. NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
  3506. NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  3507. NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
  3508. NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
  3509. NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  3510. NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
  3511. NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
  3512. NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
  3513. NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
  3514. NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
  3515. NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
  3516. NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
  3517. NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
  3518. NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
  3519. NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
  3520. NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
  3521. NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
  3522. NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
  3523. NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
  3524. NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
  3525. NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
  3526. NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
  3527. NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
  3528. NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
  3529. NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  3530. NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
  3531. NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  3532. NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  3533. NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
  3534. NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
  3535. NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
  3536. NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
  3537. NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
  3538. NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
  3539. NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
  3540. NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  3541. NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
  3542. NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
  3543. NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
  3544. NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  3545. NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
  3546. NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
  3547. NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
  3548. NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
  3549. NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
  3550. NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
  3551. NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
  3552. NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
  3553. NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
  3554. NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
  3555. NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
  3556. NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
  3557. NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
  3558. NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  3559. NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
  3560. NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  3561. NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
  3562. NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  3563. NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
  3564. NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
  3565. NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
  3566. NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
  3567. NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
  3568. NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
  3569. NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
  3570. NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
  3571. NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
  3572. NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
  3573. NEONMAP0(vshl_n_v),
  3574. NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  3575. NEONMAP0(vshll_n_v),
  3576. NEONMAP0(vshlq_n_v),
  3577. NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
  3578. NEONMAP0(vshr_n_v),
  3579. NEONMAP0(vshrn_n_v),
  3580. NEONMAP0(vshrq_n_v),
  3581. NEONMAP1(vst1_v, arm_neon_vst1, 0),
  3582. NEONMAP1(vst1q_v, arm_neon_vst1, 0),
  3583. NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
  3584. NEONMAP1(vst2_v, arm_neon_vst2, 0),
  3585. NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
  3586. NEONMAP1(vst2q_v, arm_neon_vst2, 0),
  3587. NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
  3588. NEONMAP1(vst3_v, arm_neon_vst3, 0),
  3589. NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
  3590. NEONMAP1(vst3q_v, arm_neon_vst3, 0),
  3591. NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
  3592. NEONMAP1(vst4_v, arm_neon_vst4, 0),
  3593. NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
  3594. NEONMAP1(vst4q_v, arm_neon_vst4, 0),
  3595. NEONMAP0(vsubhn_v),
  3596. NEONMAP0(vtrn_v),
  3597. NEONMAP0(vtrnq_v),
  3598. NEONMAP0(vtst_v),
  3599. NEONMAP0(vtstq_v),
  3600. NEONMAP0(vuzp_v),
  3601. NEONMAP0(vuzpq_v),
  3602. NEONMAP0(vzip_v),
  3603. NEONMAP0(vzipq_v)
  3604. };
  3605. static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
  3606. NEONMAP1(vabs_v, aarch64_neon_abs, 0),
  3607. NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
  3608. NEONMAP0(vaddhn_v),
  3609. NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
  3610. NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
  3611. NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
  3612. NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
  3613. NEONMAP1(vcage_v, aarch64_neon_facge, 0),
  3614. NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
  3615. NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
  3616. NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
  3617. NEONMAP1(vcale_v, aarch64_neon_facge, 0),
  3618. NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
  3619. NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
  3620. NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
  3621. NEONMAP0(vceqz_v),
  3622. NEONMAP0(vceqzq_v),
  3623. NEONMAP0(vcgez_v),
  3624. NEONMAP0(vcgezq_v),
  3625. NEONMAP0(vcgtz_v),
  3626. NEONMAP0(vcgtzq_v),
  3627. NEONMAP0(vclez_v),
  3628. NEONMAP0(vclezq_v),
  3629. NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
  3630. NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
  3631. NEONMAP0(vcltz_v),
  3632. NEONMAP0(vcltzq_v),
  3633. NEONMAP1(vclz_v, ctlz, Add1ArgType),
  3634. NEONMAP1(vclzq_v, ctlz, Add1ArgType),
  3635. NEONMAP1(vcnt_v, ctpop, Add1ArgType),
  3636. NEONMAP1(vcntq_v, ctpop, Add1ArgType),
  3637. NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
  3638. NEONMAP0(vcvt_f16_v),
  3639. NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
  3640. NEONMAP0(vcvt_f32_v),
  3641. NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3642. NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3643. NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3644. NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
  3645. NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  3646. NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  3647. NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
  3648. NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  3649. NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  3650. NEONMAP0(vcvtq_f16_v),
  3651. NEONMAP0(vcvtq_f32_v),
  3652. NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3653. NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3654. NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
  3655. NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
  3656. NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
  3657. NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
  3658. NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
  3659. NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
  3660. NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
  3661. NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
  3662. NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
  3663. NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
  3664. NEONMAP0(vext_v),
  3665. NEONMAP0(vextq_v),
  3666. NEONMAP0(vfma_v),
  3667. NEONMAP0(vfmaq_v),
  3668. NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  3669. NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
  3670. NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  3671. NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
  3672. NEONMAP0(vmovl_v),
  3673. NEONMAP0(vmovn_v),
  3674. NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
  3675. NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
  3676. NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
  3677. NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  3678. NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
  3679. NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
  3680. NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
  3681. NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
  3682. NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  3683. NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
  3684. NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
  3685. NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
  3686. NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
  3687. NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
  3688. NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
  3689. NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
  3690. NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
  3691. NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
  3692. NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
  3693. NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
  3694. NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
  3695. NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  3696. NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
  3697. NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
  3698. NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  3699. NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
  3700. NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
  3701. NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
  3702. NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
  3703. NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  3704. NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
  3705. NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
  3706. NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  3707. NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
  3708. NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
  3709. NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
  3710. NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  3711. NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
  3712. NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  3713. NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
  3714. NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  3715. NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
  3716. NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  3717. NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
  3718. NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
  3719. NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
  3720. NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
  3721. NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
  3722. NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
  3723. NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
  3724. NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
  3725. NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
  3726. NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
  3727. NEONMAP0(vshl_n_v),
  3728. NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  3729. NEONMAP0(vshll_n_v),
  3730. NEONMAP0(vshlq_n_v),
  3731. NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
  3732. NEONMAP0(vshr_n_v),
  3733. NEONMAP0(vshrn_n_v),
  3734. NEONMAP0(vshrq_n_v),
  3735. NEONMAP0(vsubhn_v),
  3736. NEONMAP0(vtst_v),
  3737. NEONMAP0(vtstq_v),
  3738. };
  3739. static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
  3740. NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
  3741. NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
  3742. NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
  3743. NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  3744. NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  3745. NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
  3746. NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
  3747. NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  3748. NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  3749. NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  3750. NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
  3751. NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
  3752. NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
  3753. NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
  3754. NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  3755. NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  3756. NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  3757. NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  3758. NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  3759. NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  3760. NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
  3761. NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
  3762. NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
  3763. NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
  3764. NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  3765. NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  3766. NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  3767. NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  3768. NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  3769. NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  3770. NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  3771. NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  3772. NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  3773. NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  3774. NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  3775. NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  3776. NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  3777. NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  3778. NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  3779. NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  3780. NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  3781. NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  3782. NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  3783. NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  3784. NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  3785. NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  3786. NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  3787. NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  3788. NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
  3789. NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  3790. NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  3791. NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  3792. NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  3793. NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  3794. NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  3795. NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  3796. NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  3797. NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
  3798. NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
  3799. NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  3800. NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  3801. NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  3802. NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  3803. NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  3804. NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  3805. NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  3806. NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  3807. NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
  3808. NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
  3809. NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
  3810. NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
  3811. NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
  3812. NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  3813. NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
  3814. NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  3815. NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
  3816. NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  3817. NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
  3818. NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  3819. NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
  3820. NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
  3821. NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
  3822. NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  3823. NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
  3824. NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
  3825. NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
  3826. NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  3827. NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  3828. NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
  3829. NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
  3830. NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
  3831. NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
  3832. NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
  3833. NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
  3834. NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
  3835. NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
  3836. NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
  3837. NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
  3838. NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
  3839. NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
  3840. NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  3841. NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  3842. NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
  3843. NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
  3844. NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
  3845. NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  3846. NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
  3847. NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  3848. NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
  3849. NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
  3850. NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
  3851. NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
  3852. NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
  3853. NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  3854. NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  3855. NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
  3856. NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
  3857. NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
  3858. NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
  3859. NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
  3860. NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
  3861. NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
  3862. NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
  3863. NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  3864. NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  3865. NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
  3866. NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
  3867. NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
  3868. NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  3869. NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
  3870. NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  3871. NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  3872. NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  3873. NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  3874. NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
  3875. NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
  3876. NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  3877. NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  3878. NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
  3879. NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
  3880. NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
  3881. NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
  3882. NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
  3883. NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
  3884. NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  3885. NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
  3886. NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
  3887. NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
  3888. NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
  3889. NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  3890. NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  3891. NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
  3892. NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
  3893. NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
  3894. NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  3895. NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
  3896. NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  3897. NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  3898. NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
  3899. NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
  3900. NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
  3901. NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
  3902. NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
  3903. NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
  3904. NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
  3905. NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
  3906. NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
  3907. NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
  3908. NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
  3909. NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
  3910. NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
  3911. NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
  3912. NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
  3913. NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
  3914. NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
  3915. NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
  3916. NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
  3917. NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
  3918. NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
  3919. NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
  3920. NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
  3921. NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
  3922. NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  3923. NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
  3924. NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
  3925. NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
  3926. NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
  3927. NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
  3928. NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  3929. NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
  3930. NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
  3931. NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
  3932. // FP16 scalar intrinisics go here.
  3933. NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
  3934. NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  3935. NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
  3936. NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  3937. NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
  3938. NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  3939. NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
  3940. NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  3941. NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
  3942. NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  3943. NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
  3944. NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  3945. NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
  3946. NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  3947. NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
  3948. NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  3949. NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
  3950. NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  3951. NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
  3952. NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  3953. NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
  3954. NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  3955. NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
  3956. NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  3957. NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
  3958. NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
  3959. NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
  3960. NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
  3961. NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
  3962. NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
  3963. };
  3964. #undef NEONMAP0
  3965. #undef NEONMAP1
  3966. #undef NEONMAP2
  3967. static bool NEONSIMDIntrinsicsProvenSorted = false;
  3968. static bool AArch64SIMDIntrinsicsProvenSorted = false;
  3969. static bool AArch64SISDIntrinsicsProvenSorted = false;
  3970. static const NeonIntrinsicInfo *
  3971. findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
  3972. unsigned BuiltinID, bool &MapProvenSorted) {
  3973. #ifndef NDEBUG
  3974. if (!MapProvenSorted) {
  3975. assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
  3976. MapProvenSorted = true;
  3977. }
  3978. #endif
  3979. const NeonIntrinsicInfo *Builtin =
  3980. std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
  3981. if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
  3982. return Builtin;
  3983. return nullptr;
  3984. }
  3985. Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
  3986. unsigned Modifier,
  3987. llvm::Type *ArgType,
  3988. const CallExpr *E) {
  3989. int VectorSize = 0;
  3990. if (Modifier & Use64BitVectors)
  3991. VectorSize = 64;
  3992. else if (Modifier & Use128BitVectors)
  3993. VectorSize = 128;
  3994. // Return type.
  3995. SmallVector<llvm::Type *, 3> Tys;
  3996. if (Modifier & AddRetType) {
  3997. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  3998. if (Modifier & VectorizeRetType)
  3999. Ty = llvm::VectorType::get(
  4000. Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
  4001. Tys.push_back(Ty);
  4002. }
  4003. // Arguments.
  4004. if (Modifier & VectorizeArgTypes) {
  4005. int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
  4006. ArgType = llvm::VectorType::get(ArgType, Elts);
  4007. }
  4008. if (Modifier & (Add1ArgType | Add2ArgTypes))
  4009. Tys.push_back(ArgType);
  4010. if (Modifier & Add2ArgTypes)
  4011. Tys.push_back(ArgType);
  4012. if (Modifier & InventFloatType)
  4013. Tys.push_back(FloatTy);
  4014. return CGM.getIntrinsic(IntrinsicID, Tys);
  4015. }
  4016. static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
  4017. const NeonIntrinsicInfo &SISDInfo,
  4018. SmallVectorImpl<Value *> &Ops,
  4019. const CallExpr *E) {
  4020. unsigned BuiltinID = SISDInfo.BuiltinID;
  4021. unsigned int Int = SISDInfo.LLVMIntrinsic;
  4022. unsigned Modifier = SISDInfo.TypeModifier;
  4023. const char *s = SISDInfo.NameHint;
  4024. switch (BuiltinID) {
  4025. case NEON::BI__builtin_neon_vcled_s64:
  4026. case NEON::BI__builtin_neon_vcled_u64:
  4027. case NEON::BI__builtin_neon_vcles_f32:
  4028. case NEON::BI__builtin_neon_vcled_f64:
  4029. case NEON::BI__builtin_neon_vcltd_s64:
  4030. case NEON::BI__builtin_neon_vcltd_u64:
  4031. case NEON::BI__builtin_neon_vclts_f32:
  4032. case NEON::BI__builtin_neon_vcltd_f64:
  4033. case NEON::BI__builtin_neon_vcales_f32:
  4034. case NEON::BI__builtin_neon_vcaled_f64:
  4035. case NEON::BI__builtin_neon_vcalts_f32:
  4036. case NEON::BI__builtin_neon_vcaltd_f64:
  4037. // Only one direction of comparisons actually exist, cmle is actually a cmge
  4038. // with swapped operands. The table gives us the right intrinsic but we
  4039. // still need to do the swap.
  4040. std::swap(Ops[0], Ops[1]);
  4041. break;
  4042. }
  4043. assert(Int && "Generic code assumes a valid intrinsic");
  4044. // Determine the type(s) of this overloaded AArch64 intrinsic.
  4045. const Expr *Arg = E->getArg(0);
  4046. llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
  4047. Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
  4048. int j = 0;
  4049. ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
  4050. for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
  4051. ai != ae; ++ai, ++j) {
  4052. llvm::Type *ArgTy = ai->getType();
  4053. if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
  4054. ArgTy->getPrimitiveSizeInBits())
  4055. continue;
  4056. assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
  4057. // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
  4058. // it before inserting.
  4059. Ops[j] =
  4060. CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
  4061. Ops[j] =
  4062. CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
  4063. }
  4064. Value *Result = CGF.EmitNeonCall(F, Ops, s);
  4065. llvm::Type *ResultType = CGF.ConvertType(E->getType());
  4066. if (ResultType->getPrimitiveSizeInBits() <
  4067. Result->getType()->getPrimitiveSizeInBits())
  4068. return CGF.Builder.CreateExtractElement(Result, C0);
  4069. return CGF.Builder.CreateBitCast(Result, ResultType, s);
  4070. }
  4071. Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
  4072. unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
  4073. const char *NameHint, unsigned Modifier, const CallExpr *E,
  4074. SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
  4075. llvm::Triple::ArchType Arch) {
  4076. // Get the last argument, which specifies the vector type.
  4077. llvm::APSInt NeonTypeConst;
  4078. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  4079. if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
  4080. return nullptr;
  4081. // Determine the type of this overloaded NEON intrinsic.
  4082. NeonTypeFlags Type(NeonTypeConst.getZExtValue());
  4083. bool Usgn = Type.isUnsigned();
  4084. bool Quad = Type.isQuad();
  4085. const bool HasLegalHalfType = getTarget().hasLegalHalfType();
  4086. llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
  4087. llvm::Type *Ty = VTy;
  4088. if (!Ty)
  4089. return nullptr;
  4090. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  4091. return Builder.getInt32(addr.getAlignment().getQuantity());
  4092. };
  4093. unsigned Int = LLVMIntrinsic;
  4094. if ((Modifier & UnsignedAlts) && !Usgn)
  4095. Int = AltLLVMIntrinsic;
  4096. switch (BuiltinID) {
  4097. default: break;
  4098. case NEON::BI__builtin_neon_vabs_v:
  4099. case NEON::BI__builtin_neon_vabsq_v:
  4100. if (VTy->getElementType()->isFloatingPointTy())
  4101. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
  4102. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
  4103. case NEON::BI__builtin_neon_vaddhn_v: {
  4104. llvm::VectorType *SrcTy =
  4105. llvm::VectorType::getExtendedElementVectorType(VTy);
  4106. // %sum = add <4 x i32> %lhs, %rhs
  4107. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  4108. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  4109. Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
  4110. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  4111. Constant *ShiftAmt =
  4112. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  4113. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
  4114. // %res = trunc <4 x i32> %high to <4 x i16>
  4115. return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
  4116. }
  4117. case NEON::BI__builtin_neon_vcale_v:
  4118. case NEON::BI__builtin_neon_vcaleq_v:
  4119. case NEON::BI__builtin_neon_vcalt_v:
  4120. case NEON::BI__builtin_neon_vcaltq_v:
  4121. std::swap(Ops[0], Ops[1]);
  4122. LLVM_FALLTHROUGH;
  4123. case NEON::BI__builtin_neon_vcage_v:
  4124. case NEON::BI__builtin_neon_vcageq_v:
  4125. case NEON::BI__builtin_neon_vcagt_v:
  4126. case NEON::BI__builtin_neon_vcagtq_v: {
  4127. llvm::Type *Ty;
  4128. switch (VTy->getScalarSizeInBits()) {
  4129. default: llvm_unreachable("unexpected type");
  4130. case 32:
  4131. Ty = FloatTy;
  4132. break;
  4133. case 64:
  4134. Ty = DoubleTy;
  4135. break;
  4136. case 16:
  4137. Ty = HalfTy;
  4138. break;
  4139. }
  4140. llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
  4141. llvm::Type *Tys[] = { VTy, VecFlt };
  4142. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  4143. return EmitNeonCall(F, Ops, NameHint);
  4144. }
  4145. case NEON::BI__builtin_neon_vceqz_v:
  4146. case NEON::BI__builtin_neon_vceqzq_v:
  4147. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
  4148. ICmpInst::ICMP_EQ, "vceqz");
  4149. case NEON::BI__builtin_neon_vcgez_v:
  4150. case NEON::BI__builtin_neon_vcgezq_v:
  4151. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
  4152. ICmpInst::ICMP_SGE, "vcgez");
  4153. case NEON::BI__builtin_neon_vclez_v:
  4154. case NEON::BI__builtin_neon_vclezq_v:
  4155. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
  4156. ICmpInst::ICMP_SLE, "vclez");
  4157. case NEON::BI__builtin_neon_vcgtz_v:
  4158. case NEON::BI__builtin_neon_vcgtzq_v:
  4159. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
  4160. ICmpInst::ICMP_SGT, "vcgtz");
  4161. case NEON::BI__builtin_neon_vcltz_v:
  4162. case NEON::BI__builtin_neon_vcltzq_v:
  4163. return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
  4164. ICmpInst::ICMP_SLT, "vcltz");
  4165. case NEON::BI__builtin_neon_vclz_v:
  4166. case NEON::BI__builtin_neon_vclzq_v:
  4167. // We generate target-independent intrinsic, which needs a second argument
  4168. // for whether or not clz of zero is undefined; on ARM it isn't.
  4169. Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
  4170. break;
  4171. case NEON::BI__builtin_neon_vcvt_f32_v:
  4172. case NEON::BI__builtin_neon_vcvtq_f32_v:
  4173. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4174. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
  4175. HasLegalHalfType);
  4176. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  4177. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  4178. case NEON::BI__builtin_neon_vcvt_f16_v:
  4179. case NEON::BI__builtin_neon_vcvtq_f16_v:
  4180. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4181. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
  4182. HasLegalHalfType);
  4183. return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  4184. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  4185. case NEON::BI__builtin_neon_vcvt_n_f16_v:
  4186. case NEON::BI__builtin_neon_vcvt_n_f32_v:
  4187. case NEON::BI__builtin_neon_vcvt_n_f64_v:
  4188. case NEON::BI__builtin_neon_vcvtq_n_f16_v:
  4189. case NEON::BI__builtin_neon_vcvtq_n_f32_v:
  4190. case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
  4191. llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
  4192. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  4193. Function *F = CGM.getIntrinsic(Int, Tys);
  4194. return EmitNeonCall(F, Ops, "vcvt_n");
  4195. }
  4196. case NEON::BI__builtin_neon_vcvt_n_s16_v:
  4197. case NEON::BI__builtin_neon_vcvt_n_s32_v:
  4198. case NEON::BI__builtin_neon_vcvt_n_u16_v:
  4199. case NEON::BI__builtin_neon_vcvt_n_u32_v:
  4200. case NEON::BI__builtin_neon_vcvt_n_s64_v:
  4201. case NEON::BI__builtin_neon_vcvt_n_u64_v:
  4202. case NEON::BI__builtin_neon_vcvtq_n_s16_v:
  4203. case NEON::BI__builtin_neon_vcvtq_n_s32_v:
  4204. case NEON::BI__builtin_neon_vcvtq_n_u16_v:
  4205. case NEON::BI__builtin_neon_vcvtq_n_u32_v:
  4206. case NEON::BI__builtin_neon_vcvtq_n_s64_v:
  4207. case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
  4208. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  4209. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  4210. return EmitNeonCall(F, Ops, "vcvt_n");
  4211. }
  4212. case NEON::BI__builtin_neon_vcvt_s32_v:
  4213. case NEON::BI__builtin_neon_vcvt_u32_v:
  4214. case NEON::BI__builtin_neon_vcvt_s64_v:
  4215. case NEON::BI__builtin_neon_vcvt_u64_v:
  4216. case NEON::BI__builtin_neon_vcvt_s16_v:
  4217. case NEON::BI__builtin_neon_vcvt_u16_v:
  4218. case NEON::BI__builtin_neon_vcvtq_s32_v:
  4219. case NEON::BI__builtin_neon_vcvtq_u32_v:
  4220. case NEON::BI__builtin_neon_vcvtq_s64_v:
  4221. case NEON::BI__builtin_neon_vcvtq_u64_v:
  4222. case NEON::BI__builtin_neon_vcvtq_s16_v:
  4223. case NEON::BI__builtin_neon_vcvtq_u16_v: {
  4224. Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
  4225. return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
  4226. : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
  4227. }
  4228. case NEON::BI__builtin_neon_vcvta_s16_v:
  4229. case NEON::BI__builtin_neon_vcvta_s32_v:
  4230. case NEON::BI__builtin_neon_vcvta_s64_v:
  4231. case NEON::BI__builtin_neon_vcvta_u32_v:
  4232. case NEON::BI__builtin_neon_vcvta_u64_v:
  4233. case NEON::BI__builtin_neon_vcvtaq_s16_v:
  4234. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  4235. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  4236. case NEON::BI__builtin_neon_vcvtaq_u16_v:
  4237. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  4238. case NEON::BI__builtin_neon_vcvtaq_u64_v:
  4239. case NEON::BI__builtin_neon_vcvtn_s16_v:
  4240. case NEON::BI__builtin_neon_vcvtn_s32_v:
  4241. case NEON::BI__builtin_neon_vcvtn_s64_v:
  4242. case NEON::BI__builtin_neon_vcvtn_u16_v:
  4243. case NEON::BI__builtin_neon_vcvtn_u32_v:
  4244. case NEON::BI__builtin_neon_vcvtn_u64_v:
  4245. case NEON::BI__builtin_neon_vcvtnq_s16_v:
  4246. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  4247. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  4248. case NEON::BI__builtin_neon_vcvtnq_u16_v:
  4249. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  4250. case NEON::BI__builtin_neon_vcvtnq_u64_v:
  4251. case NEON::BI__builtin_neon_vcvtp_s16_v:
  4252. case NEON::BI__builtin_neon_vcvtp_s32_v:
  4253. case NEON::BI__builtin_neon_vcvtp_s64_v:
  4254. case NEON::BI__builtin_neon_vcvtp_u16_v:
  4255. case NEON::BI__builtin_neon_vcvtp_u32_v:
  4256. case NEON::BI__builtin_neon_vcvtp_u64_v:
  4257. case NEON::BI__builtin_neon_vcvtpq_s16_v:
  4258. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  4259. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  4260. case NEON::BI__builtin_neon_vcvtpq_u16_v:
  4261. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  4262. case NEON::BI__builtin_neon_vcvtpq_u64_v:
  4263. case NEON::BI__builtin_neon_vcvtm_s16_v:
  4264. case NEON::BI__builtin_neon_vcvtm_s32_v:
  4265. case NEON::BI__builtin_neon_vcvtm_s64_v:
  4266. case NEON::BI__builtin_neon_vcvtm_u16_v:
  4267. case NEON::BI__builtin_neon_vcvtm_u32_v:
  4268. case NEON::BI__builtin_neon_vcvtm_u64_v:
  4269. case NEON::BI__builtin_neon_vcvtmq_s16_v:
  4270. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  4271. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  4272. case NEON::BI__builtin_neon_vcvtmq_u16_v:
  4273. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  4274. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  4275. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  4276. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
  4277. }
  4278. case NEON::BI__builtin_neon_vext_v:
  4279. case NEON::BI__builtin_neon_vextq_v: {
  4280. int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
  4281. SmallVector<uint32_t, 16> Indices;
  4282. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  4283. Indices.push_back(i+CV);
  4284. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4285. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4286. return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
  4287. }
  4288. case NEON::BI__builtin_neon_vfma_v:
  4289. case NEON::BI__builtin_neon_vfmaq_v: {
  4290. Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
  4291. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4292. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4293. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  4294. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  4295. return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
  4296. }
  4297. case NEON::BI__builtin_neon_vld1_v:
  4298. case NEON::BI__builtin_neon_vld1q_v: {
  4299. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  4300. Ops.push_back(getAlignmentValue32(PtrOp0));
  4301. return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
  4302. }
  4303. case NEON::BI__builtin_neon_vld2_v:
  4304. case NEON::BI__builtin_neon_vld2q_v:
  4305. case NEON::BI__builtin_neon_vld3_v:
  4306. case NEON::BI__builtin_neon_vld3q_v:
  4307. case NEON::BI__builtin_neon_vld4_v:
  4308. case NEON::BI__builtin_neon_vld4q_v: {
  4309. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  4310. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  4311. Value *Align = getAlignmentValue32(PtrOp1);
  4312. Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
  4313. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  4314. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4315. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  4316. }
  4317. case NEON::BI__builtin_neon_vld1_dup_v:
  4318. case NEON::BI__builtin_neon_vld1q_dup_v: {
  4319. Value *V = UndefValue::get(Ty);
  4320. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  4321. PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
  4322. LoadInst *Ld = Builder.CreateLoad(PtrOp0);
  4323. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  4324. Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
  4325. return EmitNeonSplat(Ops[0], CI);
  4326. }
  4327. case NEON::BI__builtin_neon_vld2_lane_v:
  4328. case NEON::BI__builtin_neon_vld2q_lane_v:
  4329. case NEON::BI__builtin_neon_vld3_lane_v:
  4330. case NEON::BI__builtin_neon_vld3q_lane_v:
  4331. case NEON::BI__builtin_neon_vld4_lane_v:
  4332. case NEON::BI__builtin_neon_vld4q_lane_v: {
  4333. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  4334. Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
  4335. for (unsigned I = 2; I < Ops.size() - 1; ++I)
  4336. Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
  4337. Ops.push_back(getAlignmentValue32(PtrOp1));
  4338. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
  4339. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  4340. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4341. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  4342. }
  4343. case NEON::BI__builtin_neon_vmovl_v: {
  4344. llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
  4345. Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
  4346. if (Usgn)
  4347. return Builder.CreateZExt(Ops[0], Ty, "vmovl");
  4348. return Builder.CreateSExt(Ops[0], Ty, "vmovl");
  4349. }
  4350. case NEON::BI__builtin_neon_vmovn_v: {
  4351. llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
  4352. Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
  4353. return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
  4354. }
  4355. case NEON::BI__builtin_neon_vmull_v:
  4356. // FIXME: the integer vmull operations could be emitted in terms of pure
  4357. // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
  4358. // hoisting the exts outside loops. Until global ISel comes along that can
  4359. // see through such movement this leads to bad CodeGen. So we need an
  4360. // intrinsic for now.
  4361. Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
  4362. Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
  4363. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  4364. case NEON::BI__builtin_neon_vpadal_v:
  4365. case NEON::BI__builtin_neon_vpadalq_v: {
  4366. // The source operand type has twice as many elements of half the size.
  4367. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  4368. llvm::Type *EltTy =
  4369. llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  4370. llvm::Type *NarrowTy =
  4371. llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
  4372. llvm::Type *Tys[2] = { Ty, NarrowTy };
  4373. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
  4374. }
  4375. case NEON::BI__builtin_neon_vpaddl_v:
  4376. case NEON::BI__builtin_neon_vpaddlq_v: {
  4377. // The source operand type has twice as many elements of half the size.
  4378. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
  4379. llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
  4380. llvm::Type *NarrowTy =
  4381. llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
  4382. llvm::Type *Tys[2] = { Ty, NarrowTy };
  4383. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
  4384. }
  4385. case NEON::BI__builtin_neon_vqdmlal_v:
  4386. case NEON::BI__builtin_neon_vqdmlsl_v: {
  4387. SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
  4388. Ops[1] =
  4389. EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
  4390. Ops.resize(2);
  4391. return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
  4392. }
  4393. case NEON::BI__builtin_neon_vqshl_n_v:
  4394. case NEON::BI__builtin_neon_vqshlq_n_v:
  4395. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
  4396. 1, false);
  4397. case NEON::BI__builtin_neon_vqshlu_n_v:
  4398. case NEON::BI__builtin_neon_vqshluq_n_v:
  4399. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
  4400. 1, false);
  4401. case NEON::BI__builtin_neon_vrecpe_v:
  4402. case NEON::BI__builtin_neon_vrecpeq_v:
  4403. case NEON::BI__builtin_neon_vrsqrte_v:
  4404. case NEON::BI__builtin_neon_vrsqrteq_v:
  4405. Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
  4406. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
  4407. case NEON::BI__builtin_neon_vrshr_n_v:
  4408. case NEON::BI__builtin_neon_vrshrq_n_v:
  4409. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
  4410. 1, true);
  4411. case NEON::BI__builtin_neon_vshl_n_v:
  4412. case NEON::BI__builtin_neon_vshlq_n_v:
  4413. Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
  4414. return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
  4415. "vshl_n");
  4416. case NEON::BI__builtin_neon_vshll_n_v: {
  4417. llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
  4418. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  4419. if (Usgn)
  4420. Ops[0] = Builder.CreateZExt(Ops[0], VTy);
  4421. else
  4422. Ops[0] = Builder.CreateSExt(Ops[0], VTy);
  4423. Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
  4424. return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
  4425. }
  4426. case NEON::BI__builtin_neon_vshrn_n_v: {
  4427. llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
  4428. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  4429. Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
  4430. if (Usgn)
  4431. Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
  4432. else
  4433. Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
  4434. return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
  4435. }
  4436. case NEON::BI__builtin_neon_vshr_n_v:
  4437. case NEON::BI__builtin_neon_vshrq_n_v:
  4438. return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
  4439. case NEON::BI__builtin_neon_vst1_v:
  4440. case NEON::BI__builtin_neon_vst1q_v:
  4441. case NEON::BI__builtin_neon_vst2_v:
  4442. case NEON::BI__builtin_neon_vst2q_v:
  4443. case NEON::BI__builtin_neon_vst3_v:
  4444. case NEON::BI__builtin_neon_vst3q_v:
  4445. case NEON::BI__builtin_neon_vst4_v:
  4446. case NEON::BI__builtin_neon_vst4q_v:
  4447. case NEON::BI__builtin_neon_vst2_lane_v:
  4448. case NEON::BI__builtin_neon_vst2q_lane_v:
  4449. case NEON::BI__builtin_neon_vst3_lane_v:
  4450. case NEON::BI__builtin_neon_vst3q_lane_v:
  4451. case NEON::BI__builtin_neon_vst4_lane_v:
  4452. case NEON::BI__builtin_neon_vst4q_lane_v: {
  4453. llvm::Type *Tys[] = {Int8PtrTy, Ty};
  4454. Ops.push_back(getAlignmentValue32(PtrOp0));
  4455. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
  4456. }
  4457. case NEON::BI__builtin_neon_vsubhn_v: {
  4458. llvm::VectorType *SrcTy =
  4459. llvm::VectorType::getExtendedElementVectorType(VTy);
  4460. // %sum = add <4 x i32> %lhs, %rhs
  4461. Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
  4462. Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
  4463. Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
  4464. // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
  4465. Constant *ShiftAmt =
  4466. ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
  4467. Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
  4468. // %res = trunc <4 x i32> %high to <4 x i16>
  4469. return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
  4470. }
  4471. case NEON::BI__builtin_neon_vtrn_v:
  4472. case NEON::BI__builtin_neon_vtrnq_v: {
  4473. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  4474. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4475. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  4476. Value *SV = nullptr;
  4477. for (unsigned vi = 0; vi != 2; ++vi) {
  4478. SmallVector<uint32_t, 16> Indices;
  4479. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  4480. Indices.push_back(i+vi);
  4481. Indices.push_back(i+e+vi);
  4482. }
  4483. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  4484. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  4485. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  4486. }
  4487. return SV;
  4488. }
  4489. case NEON::BI__builtin_neon_vtst_v:
  4490. case NEON::BI__builtin_neon_vtstq_v: {
  4491. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  4492. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4493. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  4494. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  4495. ConstantAggregateZero::get(Ty));
  4496. return Builder.CreateSExt(Ops[0], Ty, "vtst");
  4497. }
  4498. case NEON::BI__builtin_neon_vuzp_v:
  4499. case NEON::BI__builtin_neon_vuzpq_v: {
  4500. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  4501. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4502. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  4503. Value *SV = nullptr;
  4504. for (unsigned vi = 0; vi != 2; ++vi) {
  4505. SmallVector<uint32_t, 16> Indices;
  4506. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  4507. Indices.push_back(2*i+vi);
  4508. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  4509. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  4510. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  4511. }
  4512. return SV;
  4513. }
  4514. case NEON::BI__builtin_neon_vzip_v:
  4515. case NEON::BI__builtin_neon_vzipq_v: {
  4516. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  4517. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  4518. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  4519. Value *SV = nullptr;
  4520. for (unsigned vi = 0; vi != 2; ++vi) {
  4521. SmallVector<uint32_t, 16> Indices;
  4522. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  4523. Indices.push_back((i + vi*e) >> 1);
  4524. Indices.push_back(((i + vi*e) >> 1)+e);
  4525. }
  4526. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  4527. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  4528. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  4529. }
  4530. return SV;
  4531. }
  4532. case NEON::BI__builtin_neon_vdot_v:
  4533. case NEON::BI__builtin_neon_vdotq_v: {
  4534. llvm::Type *InputTy =
  4535. llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
  4536. llvm::Type *Tys[2] = { Ty, InputTy };
  4537. Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
  4538. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
  4539. }
  4540. }
  4541. assert(Int && "Expected valid intrinsic number");
  4542. // Determine the type(s) of this overloaded AArch64 intrinsic.
  4543. Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
  4544. Value *Result = EmitNeonCall(F, Ops, NameHint);
  4545. llvm::Type *ResultType = ConvertType(E->getType());
  4546. // AArch64 intrinsic one-element vector type cast to
  4547. // scalar type expected by the builtin
  4548. return Builder.CreateBitCast(Result, ResultType, NameHint);
  4549. }
  4550. Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
  4551. Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
  4552. const CmpInst::Predicate Ip, const Twine &Name) {
  4553. llvm::Type *OTy = Op->getType();
  4554. // FIXME: this is utterly horrific. We should not be looking at previous
  4555. // codegen context to find out what needs doing. Unfortunately TableGen
  4556. // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
  4557. // (etc).
  4558. if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
  4559. OTy = BI->getOperand(0)->getType();
  4560. Op = Builder.CreateBitCast(Op, OTy);
  4561. if (OTy->getScalarType()->isFloatingPointTy()) {
  4562. Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
  4563. } else {
  4564. Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
  4565. }
  4566. return Builder.CreateSExt(Op, Ty, Name);
  4567. }
  4568. static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
  4569. Value *ExtOp, Value *IndexOp,
  4570. llvm::Type *ResTy, unsigned IntID,
  4571. const char *Name) {
  4572. SmallVector<Value *, 2> TblOps;
  4573. if (ExtOp)
  4574. TblOps.push_back(ExtOp);
  4575. // Build a vector containing sequential number like (0, 1, 2, ..., 15)
  4576. SmallVector<uint32_t, 16> Indices;
  4577. llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
  4578. for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
  4579. Indices.push_back(2*i);
  4580. Indices.push_back(2*i+1);
  4581. }
  4582. int PairPos = 0, End = Ops.size() - 1;
  4583. while (PairPos < End) {
  4584. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  4585. Ops[PairPos+1], Indices,
  4586. Name));
  4587. PairPos += 2;
  4588. }
  4589. // If there's an odd number of 64-bit lookup table, fill the high 64-bit
  4590. // of the 128-bit lookup table with zero.
  4591. if (PairPos == End) {
  4592. Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
  4593. TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
  4594. ZeroTbl, Indices, Name));
  4595. }
  4596. Function *TblF;
  4597. TblOps.push_back(IndexOp);
  4598. TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
  4599. return CGF.EmitNeonCall(TblF, TblOps, Name);
  4600. }
  4601. Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
  4602. unsigned Value;
  4603. switch (BuiltinID) {
  4604. default:
  4605. return nullptr;
  4606. case ARM::BI__builtin_arm_nop:
  4607. Value = 0;
  4608. break;
  4609. case ARM::BI__builtin_arm_yield:
  4610. case ARM::BI__yield:
  4611. Value = 1;
  4612. break;
  4613. case ARM::BI__builtin_arm_wfe:
  4614. case ARM::BI__wfe:
  4615. Value = 2;
  4616. break;
  4617. case ARM::BI__builtin_arm_wfi:
  4618. case ARM::BI__wfi:
  4619. Value = 3;
  4620. break;
  4621. case ARM::BI__builtin_arm_sev:
  4622. case ARM::BI__sev:
  4623. Value = 4;
  4624. break;
  4625. case ARM::BI__builtin_arm_sevl:
  4626. case ARM::BI__sevl:
  4627. Value = 5;
  4628. break;
  4629. }
  4630. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
  4631. llvm::ConstantInt::get(Int32Ty, Value));
  4632. }
  4633. // Generates the IR for the read/write special register builtin,
  4634. // ValueType is the type of the value that is to be written or read,
  4635. // RegisterType is the type of the register being written to or read from.
  4636. static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
  4637. const CallExpr *E,
  4638. llvm::Type *RegisterType,
  4639. llvm::Type *ValueType,
  4640. bool IsRead,
  4641. StringRef SysReg = "") {
  4642. // write and register intrinsics only support 32 and 64 bit operations.
  4643. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
  4644. && "Unsupported size for register.");
  4645. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  4646. CodeGen::CodeGenModule &CGM = CGF.CGM;
  4647. LLVMContext &Context = CGM.getLLVMContext();
  4648. if (SysReg.empty()) {
  4649. const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
  4650. SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
  4651. }
  4652. llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
  4653. llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
  4654. llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
  4655. llvm::Type *Types[] = { RegisterType };
  4656. bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
  4657. assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
  4658. && "Can't fit 64-bit value in 32-bit register");
  4659. if (IsRead) {
  4660. llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
  4661. llvm::Value *Call = Builder.CreateCall(F, Metadata);
  4662. if (MixedTypes)
  4663. // Read into 64 bit register and then truncate result to 32 bit.
  4664. return Builder.CreateTrunc(Call, ValueType);
  4665. if (ValueType->isPointerTy())
  4666. // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
  4667. return Builder.CreateIntToPtr(Call, ValueType);
  4668. return Call;
  4669. }
  4670. llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
  4671. llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
  4672. if (MixedTypes) {
  4673. // Extend 32 bit write value to 64 bit to pass to write.
  4674. ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
  4675. return Builder.CreateCall(F, { Metadata, ArgValue });
  4676. }
  4677. if (ValueType->isPointerTy()) {
  4678. // Have VoidPtrTy ArgValue but want to return an i32/i64.
  4679. ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
  4680. return Builder.CreateCall(F, { Metadata, ArgValue });
  4681. }
  4682. return Builder.CreateCall(F, { Metadata, ArgValue });
  4683. }
  4684. /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
  4685. /// argument that specifies the vector type.
  4686. static bool HasExtraNeonArgument(unsigned BuiltinID) {
  4687. switch (BuiltinID) {
  4688. default: break;
  4689. case NEON::BI__builtin_neon_vget_lane_i8:
  4690. case NEON::BI__builtin_neon_vget_lane_i16:
  4691. case NEON::BI__builtin_neon_vget_lane_i32:
  4692. case NEON::BI__builtin_neon_vget_lane_i64:
  4693. case NEON::BI__builtin_neon_vget_lane_f32:
  4694. case NEON::BI__builtin_neon_vgetq_lane_i8:
  4695. case NEON::BI__builtin_neon_vgetq_lane_i16:
  4696. case NEON::BI__builtin_neon_vgetq_lane_i32:
  4697. case NEON::BI__builtin_neon_vgetq_lane_i64:
  4698. case NEON::BI__builtin_neon_vgetq_lane_f32:
  4699. case NEON::BI__builtin_neon_vset_lane_i8:
  4700. case NEON::BI__builtin_neon_vset_lane_i16:
  4701. case NEON::BI__builtin_neon_vset_lane_i32:
  4702. case NEON::BI__builtin_neon_vset_lane_i64:
  4703. case NEON::BI__builtin_neon_vset_lane_f32:
  4704. case NEON::BI__builtin_neon_vsetq_lane_i8:
  4705. case NEON::BI__builtin_neon_vsetq_lane_i16:
  4706. case NEON::BI__builtin_neon_vsetq_lane_i32:
  4707. case NEON::BI__builtin_neon_vsetq_lane_i64:
  4708. case NEON::BI__builtin_neon_vsetq_lane_f32:
  4709. case NEON::BI__builtin_neon_vsha1h_u32:
  4710. case NEON::BI__builtin_neon_vsha1cq_u32:
  4711. case NEON::BI__builtin_neon_vsha1pq_u32:
  4712. case NEON::BI__builtin_neon_vsha1mq_u32:
  4713. case clang::ARM::BI_MoveToCoprocessor:
  4714. case clang::ARM::BI_MoveToCoprocessor2:
  4715. return false;
  4716. }
  4717. return true;
  4718. }
  4719. Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
  4720. const CallExpr *E,
  4721. llvm::Triple::ArchType Arch) {
  4722. if (auto Hint = GetValueForARMHint(BuiltinID))
  4723. return Hint;
  4724. if (BuiltinID == ARM::BI__emit) {
  4725. bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
  4726. llvm::FunctionType *FTy =
  4727. llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
  4728. APSInt Value;
  4729. if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
  4730. llvm_unreachable("Sema will ensure that the parameter is constant");
  4731. uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
  4732. llvm::InlineAsm *Emit =
  4733. IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
  4734. /*SideEffects=*/true)
  4735. : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
  4736. /*SideEffects=*/true);
  4737. return Builder.CreateCall(Emit);
  4738. }
  4739. if (BuiltinID == ARM::BI__builtin_arm_dbg) {
  4740. Value *Option = EmitScalarExpr(E->getArg(0));
  4741. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
  4742. }
  4743. if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
  4744. Value *Address = EmitScalarExpr(E->getArg(0));
  4745. Value *RW = EmitScalarExpr(E->getArg(1));
  4746. Value *IsData = EmitScalarExpr(E->getArg(2));
  4747. // Locality is not supported on ARM target
  4748. Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
  4749. Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
  4750. return Builder.CreateCall(F, {Address, RW, Locality, IsData});
  4751. }
  4752. if (BuiltinID == ARM::BI__builtin_arm_rbit) {
  4753. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  4754. return Builder.CreateCall(
  4755. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  4756. }
  4757. if (BuiltinID == ARM::BI__clear_cache) {
  4758. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  4759. const FunctionDecl *FD = E->getDirectCallee();
  4760. Value *Ops[2];
  4761. for (unsigned i = 0; i < 2; i++)
  4762. Ops[i] = EmitScalarExpr(E->getArg(i));
  4763. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  4764. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  4765. StringRef Name = FD->getName();
  4766. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  4767. }
  4768. if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
  4769. BuiltinID == ARM::BI__builtin_arm_mcrr2) {
  4770. Function *F;
  4771. switch (BuiltinID) {
  4772. default: llvm_unreachable("unexpected builtin");
  4773. case ARM::BI__builtin_arm_mcrr:
  4774. F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
  4775. break;
  4776. case ARM::BI__builtin_arm_mcrr2:
  4777. F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
  4778. break;
  4779. }
  4780. // MCRR{2} instruction has 5 operands but
  4781. // the intrinsic has 4 because Rt and Rt2
  4782. // are represented as a single unsigned 64
  4783. // bit integer in the intrinsic definition
  4784. // but internally it's represented as 2 32
  4785. // bit integers.
  4786. Value *Coproc = EmitScalarExpr(E->getArg(0));
  4787. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  4788. Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
  4789. Value *CRm = EmitScalarExpr(E->getArg(3));
  4790. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  4791. Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
  4792. Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
  4793. Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
  4794. return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
  4795. }
  4796. if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
  4797. BuiltinID == ARM::BI__builtin_arm_mrrc2) {
  4798. Function *F;
  4799. switch (BuiltinID) {
  4800. default: llvm_unreachable("unexpected builtin");
  4801. case ARM::BI__builtin_arm_mrrc:
  4802. F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
  4803. break;
  4804. case ARM::BI__builtin_arm_mrrc2:
  4805. F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
  4806. break;
  4807. }
  4808. Value *Coproc = EmitScalarExpr(E->getArg(0));
  4809. Value *Opc1 = EmitScalarExpr(E->getArg(1));
  4810. Value *CRm = EmitScalarExpr(E->getArg(2));
  4811. Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
  4812. // Returns an unsigned 64 bit integer, represented
  4813. // as two 32 bit integers.
  4814. Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
  4815. Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
  4816. Rt = Builder.CreateZExt(Rt, Int64Ty);
  4817. Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
  4818. Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
  4819. RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
  4820. RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
  4821. return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
  4822. }
  4823. if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
  4824. ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
  4825. BuiltinID == ARM::BI__builtin_arm_ldaex) &&
  4826. getContext().getTypeSize(E->getType()) == 64) ||
  4827. BuiltinID == ARM::BI__ldrexd) {
  4828. Function *F;
  4829. switch (BuiltinID) {
  4830. default: llvm_unreachable("unexpected builtin");
  4831. case ARM::BI__builtin_arm_ldaex:
  4832. F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
  4833. break;
  4834. case ARM::BI__builtin_arm_ldrexd:
  4835. case ARM::BI__builtin_arm_ldrex:
  4836. case ARM::BI__ldrexd:
  4837. F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
  4838. break;
  4839. }
  4840. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  4841. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  4842. "ldrexd");
  4843. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  4844. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  4845. Val0 = Builder.CreateZExt(Val0, Int64Ty);
  4846. Val1 = Builder.CreateZExt(Val1, Int64Ty);
  4847. Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
  4848. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  4849. Val = Builder.CreateOr(Val, Val1);
  4850. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  4851. }
  4852. if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
  4853. BuiltinID == ARM::BI__builtin_arm_ldaex) {
  4854. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  4855. QualType Ty = E->getType();
  4856. llvm::Type *RealResTy = ConvertType(Ty);
  4857. llvm::Type *PtrTy = llvm::IntegerType::get(
  4858. getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
  4859. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  4860. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
  4861. ? Intrinsic::arm_ldaex
  4862. : Intrinsic::arm_ldrex,
  4863. PtrTy);
  4864. Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
  4865. if (RealResTy->isPointerTy())
  4866. return Builder.CreateIntToPtr(Val, RealResTy);
  4867. else {
  4868. llvm::Type *IntResTy = llvm::IntegerType::get(
  4869. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  4870. Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
  4871. return Builder.CreateBitCast(Val, RealResTy);
  4872. }
  4873. }
  4874. if (BuiltinID == ARM::BI__builtin_arm_strexd ||
  4875. ((BuiltinID == ARM::BI__builtin_arm_stlex ||
  4876. BuiltinID == ARM::BI__builtin_arm_strex) &&
  4877. getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
  4878. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
  4879. ? Intrinsic::arm_stlexd
  4880. : Intrinsic::arm_strexd);
  4881. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
  4882. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  4883. Value *Val = EmitScalarExpr(E->getArg(0));
  4884. Builder.CreateStore(Val, Tmp);
  4885. Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
  4886. Val = Builder.CreateLoad(LdPtr);
  4887. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  4888. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  4889. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
  4890. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
  4891. }
  4892. if (BuiltinID == ARM::BI__builtin_arm_strex ||
  4893. BuiltinID == ARM::BI__builtin_arm_stlex) {
  4894. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  4895. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  4896. QualType Ty = E->getArg(0)->getType();
  4897. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  4898. getContext().getTypeSize(Ty));
  4899. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  4900. if (StoreVal->getType()->isPointerTy())
  4901. StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
  4902. else {
  4903. llvm::Type *IntTy = llvm::IntegerType::get(
  4904. getLLVMContext(),
  4905. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  4906. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  4907. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
  4908. }
  4909. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
  4910. ? Intrinsic::arm_stlex
  4911. : Intrinsic::arm_strex,
  4912. StoreAddr->getType());
  4913. return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
  4914. }
  4915. switch (BuiltinID) {
  4916. case ARM::BI__iso_volatile_load8:
  4917. case ARM::BI__iso_volatile_load16:
  4918. case ARM::BI__iso_volatile_load32:
  4919. case ARM::BI__iso_volatile_load64: {
  4920. Value *Ptr = EmitScalarExpr(E->getArg(0));
  4921. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  4922. CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
  4923. llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
  4924. LoadSize.getQuantity() * 8);
  4925. Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  4926. llvm::LoadInst *Load =
  4927. Builder.CreateAlignedLoad(Ptr, LoadSize);
  4928. Load->setVolatile(true);
  4929. return Load;
  4930. }
  4931. case ARM::BI__iso_volatile_store8:
  4932. case ARM::BI__iso_volatile_store16:
  4933. case ARM::BI__iso_volatile_store32:
  4934. case ARM::BI__iso_volatile_store64: {
  4935. Value *Ptr = EmitScalarExpr(E->getArg(0));
  4936. Value *Value = EmitScalarExpr(E->getArg(1));
  4937. QualType ElTy = E->getArg(0)->getType()->getPointeeType();
  4938. CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
  4939. llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
  4940. StoreSize.getQuantity() * 8);
  4941. Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
  4942. llvm::StoreInst *Store =
  4943. Builder.CreateAlignedStore(Value, Ptr,
  4944. StoreSize);
  4945. Store->setVolatile(true);
  4946. return Store;
  4947. }
  4948. }
  4949. if (BuiltinID == ARM::BI__builtin_arm_clrex) {
  4950. Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
  4951. return Builder.CreateCall(F);
  4952. }
  4953. // CRC32
  4954. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  4955. switch (BuiltinID) {
  4956. case ARM::BI__builtin_arm_crc32b:
  4957. CRCIntrinsicID = Intrinsic::arm_crc32b; break;
  4958. case ARM::BI__builtin_arm_crc32cb:
  4959. CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
  4960. case ARM::BI__builtin_arm_crc32h:
  4961. CRCIntrinsicID = Intrinsic::arm_crc32h; break;
  4962. case ARM::BI__builtin_arm_crc32ch:
  4963. CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
  4964. case ARM::BI__builtin_arm_crc32w:
  4965. case ARM::BI__builtin_arm_crc32d:
  4966. CRCIntrinsicID = Intrinsic::arm_crc32w; break;
  4967. case ARM::BI__builtin_arm_crc32cw:
  4968. case ARM::BI__builtin_arm_crc32cd:
  4969. CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
  4970. }
  4971. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  4972. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  4973. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  4974. // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
  4975. // intrinsics, hence we need different codegen for these cases.
  4976. if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
  4977. BuiltinID == ARM::BI__builtin_arm_crc32cd) {
  4978. Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
  4979. Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
  4980. Value *Arg1b = Builder.CreateLShr(Arg1, C1);
  4981. Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
  4982. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  4983. Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
  4984. return Builder.CreateCall(F, {Res, Arg1b});
  4985. } else {
  4986. Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
  4987. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  4988. return Builder.CreateCall(F, {Arg0, Arg1});
  4989. }
  4990. }
  4991. if (BuiltinID == ARM::BI__builtin_arm_rsr ||
  4992. BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  4993. BuiltinID == ARM::BI__builtin_arm_rsrp ||
  4994. BuiltinID == ARM::BI__builtin_arm_wsr ||
  4995. BuiltinID == ARM::BI__builtin_arm_wsr64 ||
  4996. BuiltinID == ARM::BI__builtin_arm_wsrp) {
  4997. bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
  4998. BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  4999. BuiltinID == ARM::BI__builtin_arm_rsrp;
  5000. bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
  5001. BuiltinID == ARM::BI__builtin_arm_wsrp;
  5002. bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
  5003. BuiltinID == ARM::BI__builtin_arm_wsr64;
  5004. llvm::Type *ValueType;
  5005. llvm::Type *RegisterType;
  5006. if (IsPointerBuiltin) {
  5007. ValueType = VoidPtrTy;
  5008. RegisterType = Int32Ty;
  5009. } else if (Is64Bit) {
  5010. ValueType = RegisterType = Int64Ty;
  5011. } else {
  5012. ValueType = RegisterType = Int32Ty;
  5013. }
  5014. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
  5015. }
  5016. // Find out if any arguments are required to be integer constant
  5017. // expressions.
  5018. unsigned ICEArguments = 0;
  5019. ASTContext::GetBuiltinTypeError Error;
  5020. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  5021. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  5022. auto getAlignmentValue32 = [&](Address addr) -> Value* {
  5023. return Builder.getInt32(addr.getAlignment().getQuantity());
  5024. };
  5025. Address PtrOp0 = Address::invalid();
  5026. Address PtrOp1 = Address::invalid();
  5027. SmallVector<Value*, 4> Ops;
  5028. bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
  5029. unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
  5030. for (unsigned i = 0, e = NumArgs; i != e; i++) {
  5031. if (i == 0) {
  5032. switch (BuiltinID) {
  5033. case NEON::BI__builtin_neon_vld1_v:
  5034. case NEON::BI__builtin_neon_vld1q_v:
  5035. case NEON::BI__builtin_neon_vld1q_lane_v:
  5036. case NEON::BI__builtin_neon_vld1_lane_v:
  5037. case NEON::BI__builtin_neon_vld1_dup_v:
  5038. case NEON::BI__builtin_neon_vld1q_dup_v:
  5039. case NEON::BI__builtin_neon_vst1_v:
  5040. case NEON::BI__builtin_neon_vst1q_v:
  5041. case NEON::BI__builtin_neon_vst1q_lane_v:
  5042. case NEON::BI__builtin_neon_vst1_lane_v:
  5043. case NEON::BI__builtin_neon_vst2_v:
  5044. case NEON::BI__builtin_neon_vst2q_v:
  5045. case NEON::BI__builtin_neon_vst2_lane_v:
  5046. case NEON::BI__builtin_neon_vst2q_lane_v:
  5047. case NEON::BI__builtin_neon_vst3_v:
  5048. case NEON::BI__builtin_neon_vst3q_v:
  5049. case NEON::BI__builtin_neon_vst3_lane_v:
  5050. case NEON::BI__builtin_neon_vst3q_lane_v:
  5051. case NEON::BI__builtin_neon_vst4_v:
  5052. case NEON::BI__builtin_neon_vst4q_v:
  5053. case NEON::BI__builtin_neon_vst4_lane_v:
  5054. case NEON::BI__builtin_neon_vst4q_lane_v:
  5055. // Get the alignment for the argument in addition to the value;
  5056. // we'll use it later.
  5057. PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
  5058. Ops.push_back(PtrOp0.getPointer());
  5059. continue;
  5060. }
  5061. }
  5062. if (i == 1) {
  5063. switch (BuiltinID) {
  5064. case NEON::BI__builtin_neon_vld2_v:
  5065. case NEON::BI__builtin_neon_vld2q_v:
  5066. case NEON::BI__builtin_neon_vld3_v:
  5067. case NEON::BI__builtin_neon_vld3q_v:
  5068. case NEON::BI__builtin_neon_vld4_v:
  5069. case NEON::BI__builtin_neon_vld4q_v:
  5070. case NEON::BI__builtin_neon_vld2_lane_v:
  5071. case NEON::BI__builtin_neon_vld2q_lane_v:
  5072. case NEON::BI__builtin_neon_vld3_lane_v:
  5073. case NEON::BI__builtin_neon_vld3q_lane_v:
  5074. case NEON::BI__builtin_neon_vld4_lane_v:
  5075. case NEON::BI__builtin_neon_vld4q_lane_v:
  5076. case NEON::BI__builtin_neon_vld2_dup_v:
  5077. case NEON::BI__builtin_neon_vld3_dup_v:
  5078. case NEON::BI__builtin_neon_vld4_dup_v:
  5079. // Get the alignment for the argument in addition to the value;
  5080. // we'll use it later.
  5081. PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
  5082. Ops.push_back(PtrOp1.getPointer());
  5083. continue;
  5084. }
  5085. }
  5086. if ((ICEArguments & (1 << i)) == 0) {
  5087. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  5088. } else {
  5089. // If this is required to be a constant, constant fold it so that we know
  5090. // that the generated intrinsic gets a ConstantInt.
  5091. llvm::APSInt Result;
  5092. bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
  5093. assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
  5094. Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
  5095. }
  5096. }
  5097. switch (BuiltinID) {
  5098. default: break;
  5099. case NEON::BI__builtin_neon_vget_lane_i8:
  5100. case NEON::BI__builtin_neon_vget_lane_i16:
  5101. case NEON::BI__builtin_neon_vget_lane_i32:
  5102. case NEON::BI__builtin_neon_vget_lane_i64:
  5103. case NEON::BI__builtin_neon_vget_lane_f32:
  5104. case NEON::BI__builtin_neon_vgetq_lane_i8:
  5105. case NEON::BI__builtin_neon_vgetq_lane_i16:
  5106. case NEON::BI__builtin_neon_vgetq_lane_i32:
  5107. case NEON::BI__builtin_neon_vgetq_lane_i64:
  5108. case NEON::BI__builtin_neon_vgetq_lane_f32:
  5109. return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
  5110. case NEON::BI__builtin_neon_vrndns_f32: {
  5111. Value *Arg = EmitScalarExpr(E->getArg(0));
  5112. llvm::Type *Tys[] = {Arg->getType()};
  5113. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
  5114. return Builder.CreateCall(F, {Arg}, "vrndn"); }
  5115. case NEON::BI__builtin_neon_vset_lane_i8:
  5116. case NEON::BI__builtin_neon_vset_lane_i16:
  5117. case NEON::BI__builtin_neon_vset_lane_i32:
  5118. case NEON::BI__builtin_neon_vset_lane_i64:
  5119. case NEON::BI__builtin_neon_vset_lane_f32:
  5120. case NEON::BI__builtin_neon_vsetq_lane_i8:
  5121. case NEON::BI__builtin_neon_vsetq_lane_i16:
  5122. case NEON::BI__builtin_neon_vsetq_lane_i32:
  5123. case NEON::BI__builtin_neon_vsetq_lane_i64:
  5124. case NEON::BI__builtin_neon_vsetq_lane_f32:
  5125. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  5126. case NEON::BI__builtin_neon_vsha1h_u32:
  5127. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
  5128. "vsha1h");
  5129. case NEON::BI__builtin_neon_vsha1cq_u32:
  5130. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
  5131. "vsha1h");
  5132. case NEON::BI__builtin_neon_vsha1pq_u32:
  5133. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
  5134. "vsha1h");
  5135. case NEON::BI__builtin_neon_vsha1mq_u32:
  5136. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
  5137. "vsha1h");
  5138. // The ARM _MoveToCoprocessor builtins put the input register value as
  5139. // the first argument, but the LLVM intrinsic expects it as the third one.
  5140. case ARM::BI_MoveToCoprocessor:
  5141. case ARM::BI_MoveToCoprocessor2: {
  5142. Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
  5143. Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
  5144. return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
  5145. Ops[3], Ops[4], Ops[5]});
  5146. }
  5147. case ARM::BI_BitScanForward:
  5148. case ARM::BI_BitScanForward64:
  5149. return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
  5150. case ARM::BI_BitScanReverse:
  5151. case ARM::BI_BitScanReverse64:
  5152. return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
  5153. case ARM::BI_InterlockedAnd64:
  5154. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
  5155. case ARM::BI_InterlockedExchange64:
  5156. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
  5157. case ARM::BI_InterlockedExchangeAdd64:
  5158. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
  5159. case ARM::BI_InterlockedExchangeSub64:
  5160. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
  5161. case ARM::BI_InterlockedOr64:
  5162. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
  5163. case ARM::BI_InterlockedXor64:
  5164. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
  5165. case ARM::BI_InterlockedDecrement64:
  5166. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
  5167. case ARM::BI_InterlockedIncrement64:
  5168. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
  5169. }
  5170. // Get the last argument, which specifies the vector type.
  5171. assert(HasExtraArg);
  5172. llvm::APSInt Result;
  5173. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  5174. if (!Arg->isIntegerConstantExpr(Result, getContext()))
  5175. return nullptr;
  5176. if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
  5177. BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
  5178. // Determine the overloaded type of this builtin.
  5179. llvm::Type *Ty;
  5180. if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
  5181. Ty = FloatTy;
  5182. else
  5183. Ty = DoubleTy;
  5184. // Determine whether this is an unsigned conversion or not.
  5185. bool usgn = Result.getZExtValue() == 1;
  5186. unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
  5187. // Call the appropriate intrinsic.
  5188. Function *F = CGM.getIntrinsic(Int, Ty);
  5189. return Builder.CreateCall(F, Ops, "vcvtr");
  5190. }
  5191. // Determine the type of this overloaded NEON intrinsic.
  5192. NeonTypeFlags Type(Result.getZExtValue());
  5193. bool usgn = Type.isUnsigned();
  5194. bool rightShift = false;
  5195. llvm::VectorType *VTy = GetNeonType(this, Type,
  5196. getTarget().hasLegalHalfType());
  5197. llvm::Type *Ty = VTy;
  5198. if (!Ty)
  5199. return nullptr;
  5200. // Many NEON builtins have identical semantics and uses in ARM and
  5201. // AArch64. Emit these in a single function.
  5202. auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
  5203. const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
  5204. IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
  5205. if (Builtin)
  5206. return EmitCommonNeonBuiltinExpr(
  5207. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  5208. Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
  5209. unsigned Int;
  5210. switch (BuiltinID) {
  5211. default: return nullptr;
  5212. case NEON::BI__builtin_neon_vld1q_lane_v:
  5213. // Handle 64-bit integer elements as a special case. Use shuffles of
  5214. // one-element vectors to avoid poor code for i64 in the backend.
  5215. if (VTy->getElementType()->isIntegerTy(64)) {
  5216. // Extract the other lane.
  5217. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  5218. uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
  5219. Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
  5220. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  5221. // Load the value as a one-element vector.
  5222. Ty = llvm::VectorType::get(VTy->getElementType(), 1);
  5223. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  5224. Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
  5225. Value *Align = getAlignmentValue32(PtrOp0);
  5226. Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
  5227. // Combine them.
  5228. uint32_t Indices[] = {1 - Lane, Lane};
  5229. SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
  5230. return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
  5231. }
  5232. LLVM_FALLTHROUGH;
  5233. case NEON::BI__builtin_neon_vld1_lane_v: {
  5234. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  5235. PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
  5236. Value *Ld = Builder.CreateLoad(PtrOp0);
  5237. return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
  5238. }
  5239. case NEON::BI__builtin_neon_vld2_dup_v:
  5240. case NEON::BI__builtin_neon_vld3_dup_v:
  5241. case NEON::BI__builtin_neon_vld4_dup_v: {
  5242. // Handle 64-bit elements as a special-case. There is no "dup" needed.
  5243. if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
  5244. switch (BuiltinID) {
  5245. case NEON::BI__builtin_neon_vld2_dup_v:
  5246. Int = Intrinsic::arm_neon_vld2;
  5247. break;
  5248. case NEON::BI__builtin_neon_vld3_dup_v:
  5249. Int = Intrinsic::arm_neon_vld3;
  5250. break;
  5251. case NEON::BI__builtin_neon_vld4_dup_v:
  5252. Int = Intrinsic::arm_neon_vld4;
  5253. break;
  5254. default: llvm_unreachable("unknown vld_dup intrinsic?");
  5255. }
  5256. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  5257. Function *F = CGM.getIntrinsic(Int, Tys);
  5258. llvm::Value *Align = getAlignmentValue32(PtrOp1);
  5259. Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
  5260. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  5261. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  5262. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  5263. }
  5264. switch (BuiltinID) {
  5265. case NEON::BI__builtin_neon_vld2_dup_v:
  5266. Int = Intrinsic::arm_neon_vld2lane;
  5267. break;
  5268. case NEON::BI__builtin_neon_vld3_dup_v:
  5269. Int = Intrinsic::arm_neon_vld3lane;
  5270. break;
  5271. case NEON::BI__builtin_neon_vld4_dup_v:
  5272. Int = Intrinsic::arm_neon_vld4lane;
  5273. break;
  5274. default: llvm_unreachable("unknown vld_dup intrinsic?");
  5275. }
  5276. llvm::Type *Tys[] = {Ty, Int8PtrTy};
  5277. Function *F = CGM.getIntrinsic(Int, Tys);
  5278. llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
  5279. SmallVector<Value*, 6> Args;
  5280. Args.push_back(Ops[1]);
  5281. Args.append(STy->getNumElements(), UndefValue::get(Ty));
  5282. llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
  5283. Args.push_back(CI);
  5284. Args.push_back(getAlignmentValue32(PtrOp1));
  5285. Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
  5286. // splat lane 0 to all elts in each vector of the result.
  5287. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
  5288. Value *Val = Builder.CreateExtractValue(Ops[1], i);
  5289. Value *Elt = Builder.CreateBitCast(Val, Ty);
  5290. Elt = EmitNeonSplat(Elt, CI);
  5291. Elt = Builder.CreateBitCast(Elt, Val->getType());
  5292. Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
  5293. }
  5294. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  5295. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  5296. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  5297. }
  5298. case NEON::BI__builtin_neon_vqrshrn_n_v:
  5299. Int =
  5300. usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
  5301. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
  5302. 1, true);
  5303. case NEON::BI__builtin_neon_vqrshrun_n_v:
  5304. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
  5305. Ops, "vqrshrun_n", 1, true);
  5306. case NEON::BI__builtin_neon_vqshrn_n_v:
  5307. Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
  5308. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
  5309. 1, true);
  5310. case NEON::BI__builtin_neon_vqshrun_n_v:
  5311. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
  5312. Ops, "vqshrun_n", 1, true);
  5313. case NEON::BI__builtin_neon_vrecpe_v:
  5314. case NEON::BI__builtin_neon_vrecpeq_v:
  5315. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
  5316. Ops, "vrecpe");
  5317. case NEON::BI__builtin_neon_vrshrn_n_v:
  5318. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
  5319. Ops, "vrshrn_n", 1, true);
  5320. case NEON::BI__builtin_neon_vrsra_n_v:
  5321. case NEON::BI__builtin_neon_vrsraq_n_v:
  5322. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  5323. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  5324. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
  5325. Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
  5326. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
  5327. return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
  5328. case NEON::BI__builtin_neon_vsri_n_v:
  5329. case NEON::BI__builtin_neon_vsriq_n_v:
  5330. rightShift = true;
  5331. LLVM_FALLTHROUGH;
  5332. case NEON::BI__builtin_neon_vsli_n_v:
  5333. case NEON::BI__builtin_neon_vsliq_n_v:
  5334. Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
  5335. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
  5336. Ops, "vsli_n");
  5337. case NEON::BI__builtin_neon_vsra_n_v:
  5338. case NEON::BI__builtin_neon_vsraq_n_v:
  5339. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  5340. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  5341. return Builder.CreateAdd(Ops[0], Ops[1]);
  5342. case NEON::BI__builtin_neon_vst1q_lane_v:
  5343. // Handle 64-bit integer elements as a special case. Use a shuffle to get
  5344. // a one-element vector and avoid poor code for i64 in the backend.
  5345. if (VTy->getElementType()->isIntegerTy(64)) {
  5346. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  5347. Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
  5348. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
  5349. Ops[2] = getAlignmentValue32(PtrOp0);
  5350. llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
  5351. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
  5352. Tys), Ops);
  5353. }
  5354. LLVM_FALLTHROUGH;
  5355. case NEON::BI__builtin_neon_vst1_lane_v: {
  5356. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  5357. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  5358. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  5359. auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
  5360. return St;
  5361. }
  5362. case NEON::BI__builtin_neon_vtbl1_v:
  5363. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
  5364. Ops, "vtbl1");
  5365. case NEON::BI__builtin_neon_vtbl2_v:
  5366. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
  5367. Ops, "vtbl2");
  5368. case NEON::BI__builtin_neon_vtbl3_v:
  5369. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
  5370. Ops, "vtbl3");
  5371. case NEON::BI__builtin_neon_vtbl4_v:
  5372. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
  5373. Ops, "vtbl4");
  5374. case NEON::BI__builtin_neon_vtbx1_v:
  5375. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
  5376. Ops, "vtbx1");
  5377. case NEON::BI__builtin_neon_vtbx2_v:
  5378. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
  5379. Ops, "vtbx2");
  5380. case NEON::BI__builtin_neon_vtbx3_v:
  5381. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
  5382. Ops, "vtbx3");
  5383. case NEON::BI__builtin_neon_vtbx4_v:
  5384. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
  5385. Ops, "vtbx4");
  5386. }
  5387. }
  5388. static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
  5389. const CallExpr *E,
  5390. SmallVectorImpl<Value *> &Ops,
  5391. llvm::Triple::ArchType Arch) {
  5392. unsigned int Int = 0;
  5393. const char *s = nullptr;
  5394. switch (BuiltinID) {
  5395. default:
  5396. return nullptr;
  5397. case NEON::BI__builtin_neon_vtbl1_v:
  5398. case NEON::BI__builtin_neon_vqtbl1_v:
  5399. case NEON::BI__builtin_neon_vqtbl1q_v:
  5400. case NEON::BI__builtin_neon_vtbl2_v:
  5401. case NEON::BI__builtin_neon_vqtbl2_v:
  5402. case NEON::BI__builtin_neon_vqtbl2q_v:
  5403. case NEON::BI__builtin_neon_vtbl3_v:
  5404. case NEON::BI__builtin_neon_vqtbl3_v:
  5405. case NEON::BI__builtin_neon_vqtbl3q_v:
  5406. case NEON::BI__builtin_neon_vtbl4_v:
  5407. case NEON::BI__builtin_neon_vqtbl4_v:
  5408. case NEON::BI__builtin_neon_vqtbl4q_v:
  5409. break;
  5410. case NEON::BI__builtin_neon_vtbx1_v:
  5411. case NEON::BI__builtin_neon_vqtbx1_v:
  5412. case NEON::BI__builtin_neon_vqtbx1q_v:
  5413. case NEON::BI__builtin_neon_vtbx2_v:
  5414. case NEON::BI__builtin_neon_vqtbx2_v:
  5415. case NEON::BI__builtin_neon_vqtbx2q_v:
  5416. case NEON::BI__builtin_neon_vtbx3_v:
  5417. case NEON::BI__builtin_neon_vqtbx3_v:
  5418. case NEON::BI__builtin_neon_vqtbx3q_v:
  5419. case NEON::BI__builtin_neon_vtbx4_v:
  5420. case NEON::BI__builtin_neon_vqtbx4_v:
  5421. case NEON::BI__builtin_neon_vqtbx4q_v:
  5422. break;
  5423. }
  5424. assert(E->getNumArgs() >= 3);
  5425. // Get the last argument, which specifies the vector type.
  5426. llvm::APSInt Result;
  5427. const Expr *Arg = E->getArg(E->getNumArgs() - 1);
  5428. if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
  5429. return nullptr;
  5430. // Determine the type of this overloaded NEON intrinsic.
  5431. NeonTypeFlags Type(Result.getZExtValue());
  5432. llvm::VectorType *Ty = GetNeonType(&CGF, Type);
  5433. if (!Ty)
  5434. return nullptr;
  5435. CodeGen::CGBuilderTy &Builder = CGF.Builder;
  5436. // AArch64 scalar builtins are not overloaded, they do not have an extra
  5437. // argument that specifies the vector type, need to handle each case.
  5438. switch (BuiltinID) {
  5439. case NEON::BI__builtin_neon_vtbl1_v: {
  5440. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
  5441. Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
  5442. "vtbl1");
  5443. }
  5444. case NEON::BI__builtin_neon_vtbl2_v: {
  5445. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
  5446. Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
  5447. "vtbl1");
  5448. }
  5449. case NEON::BI__builtin_neon_vtbl3_v: {
  5450. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
  5451. Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
  5452. "vtbl2");
  5453. }
  5454. case NEON::BI__builtin_neon_vtbl4_v: {
  5455. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
  5456. Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
  5457. "vtbl2");
  5458. }
  5459. case NEON::BI__builtin_neon_vtbx1_v: {
  5460. Value *TblRes =
  5461. packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
  5462. Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
  5463. llvm::Constant *EightV = ConstantInt::get(Ty, 8);
  5464. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
  5465. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  5466. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  5467. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  5468. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  5469. }
  5470. case NEON::BI__builtin_neon_vtbx2_v: {
  5471. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
  5472. Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
  5473. "vtbx1");
  5474. }
  5475. case NEON::BI__builtin_neon_vtbx3_v: {
  5476. Value *TblRes =
  5477. packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
  5478. Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
  5479. llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
  5480. Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
  5481. TwentyFourV);
  5482. CmpRes = Builder.CreateSExt(CmpRes, Ty);
  5483. Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
  5484. Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
  5485. return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
  5486. }
  5487. case NEON::BI__builtin_neon_vtbx4_v: {
  5488. return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
  5489. Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
  5490. "vtbx2");
  5491. }
  5492. case NEON::BI__builtin_neon_vqtbl1_v:
  5493. case NEON::BI__builtin_neon_vqtbl1q_v:
  5494. Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
  5495. case NEON::BI__builtin_neon_vqtbl2_v:
  5496. case NEON::BI__builtin_neon_vqtbl2q_v: {
  5497. Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
  5498. case NEON::BI__builtin_neon_vqtbl3_v:
  5499. case NEON::BI__builtin_neon_vqtbl3q_v:
  5500. Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
  5501. case NEON::BI__builtin_neon_vqtbl4_v:
  5502. case NEON::BI__builtin_neon_vqtbl4q_v:
  5503. Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
  5504. case NEON::BI__builtin_neon_vqtbx1_v:
  5505. case NEON::BI__builtin_neon_vqtbx1q_v:
  5506. Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
  5507. case NEON::BI__builtin_neon_vqtbx2_v:
  5508. case NEON::BI__builtin_neon_vqtbx2q_v:
  5509. Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
  5510. case NEON::BI__builtin_neon_vqtbx3_v:
  5511. case NEON::BI__builtin_neon_vqtbx3q_v:
  5512. Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
  5513. case NEON::BI__builtin_neon_vqtbx4_v:
  5514. case NEON::BI__builtin_neon_vqtbx4q_v:
  5515. Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
  5516. }
  5517. }
  5518. if (!Int)
  5519. return nullptr;
  5520. Function *F = CGF.CGM.getIntrinsic(Int, Ty);
  5521. return CGF.EmitNeonCall(F, Ops, s);
  5522. }
  5523. Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
  5524. llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
  5525. Op = Builder.CreateBitCast(Op, Int16Ty);
  5526. Value *V = UndefValue::get(VTy);
  5527. llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
  5528. Op = Builder.CreateInsertElement(V, Op, CI);
  5529. return Op;
  5530. }
  5531. Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
  5532. const CallExpr *E,
  5533. llvm::Triple::ArchType Arch) {
  5534. unsigned HintID = static_cast<unsigned>(-1);
  5535. switch (BuiltinID) {
  5536. default: break;
  5537. case AArch64::BI__builtin_arm_nop:
  5538. HintID = 0;
  5539. break;
  5540. case AArch64::BI__builtin_arm_yield:
  5541. HintID = 1;
  5542. break;
  5543. case AArch64::BI__builtin_arm_wfe:
  5544. HintID = 2;
  5545. break;
  5546. case AArch64::BI__builtin_arm_wfi:
  5547. HintID = 3;
  5548. break;
  5549. case AArch64::BI__builtin_arm_sev:
  5550. HintID = 4;
  5551. break;
  5552. case AArch64::BI__builtin_arm_sevl:
  5553. HintID = 5;
  5554. break;
  5555. }
  5556. if (HintID != static_cast<unsigned>(-1)) {
  5557. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
  5558. return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
  5559. }
  5560. if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
  5561. Value *Address = EmitScalarExpr(E->getArg(0));
  5562. Value *RW = EmitScalarExpr(E->getArg(1));
  5563. Value *CacheLevel = EmitScalarExpr(E->getArg(2));
  5564. Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
  5565. Value *IsData = EmitScalarExpr(E->getArg(4));
  5566. Value *Locality = nullptr;
  5567. if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
  5568. // Temporal fetch, needs to convert cache level to locality.
  5569. Locality = llvm::ConstantInt::get(Int32Ty,
  5570. -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
  5571. } else {
  5572. // Streaming fetch.
  5573. Locality = llvm::ConstantInt::get(Int32Ty, 0);
  5574. }
  5575. // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
  5576. // PLDL3STRM or PLDL2STRM.
  5577. Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
  5578. return Builder.CreateCall(F, {Address, RW, Locality, IsData});
  5579. }
  5580. if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
  5581. assert((getContext().getTypeSize(E->getType()) == 32) &&
  5582. "rbit of unusual size!");
  5583. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  5584. return Builder.CreateCall(
  5585. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  5586. }
  5587. if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
  5588. assert((getContext().getTypeSize(E->getType()) == 64) &&
  5589. "rbit of unusual size!");
  5590. llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
  5591. return Builder.CreateCall(
  5592. CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
  5593. }
  5594. if (BuiltinID == AArch64::BI__clear_cache) {
  5595. assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
  5596. const FunctionDecl *FD = E->getDirectCallee();
  5597. Value *Ops[2];
  5598. for (unsigned i = 0; i < 2; i++)
  5599. Ops[i] = EmitScalarExpr(E->getArg(i));
  5600. llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
  5601. llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
  5602. StringRef Name = FD->getName();
  5603. return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
  5604. }
  5605. if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
  5606. BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
  5607. getContext().getTypeSize(E->getType()) == 128) {
  5608. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
  5609. ? Intrinsic::aarch64_ldaxp
  5610. : Intrinsic::aarch64_ldxp);
  5611. Value *LdPtr = EmitScalarExpr(E->getArg(0));
  5612. Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
  5613. "ldxp");
  5614. Value *Val0 = Builder.CreateExtractValue(Val, 1);
  5615. Value *Val1 = Builder.CreateExtractValue(Val, 0);
  5616. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  5617. Val0 = Builder.CreateZExt(Val0, Int128Ty);
  5618. Val1 = Builder.CreateZExt(Val1, Int128Ty);
  5619. Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
  5620. Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
  5621. Val = Builder.CreateOr(Val, Val1);
  5622. return Builder.CreateBitCast(Val, ConvertType(E->getType()));
  5623. } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
  5624. BuiltinID == AArch64::BI__builtin_arm_ldaex) {
  5625. Value *LoadAddr = EmitScalarExpr(E->getArg(0));
  5626. QualType Ty = E->getType();
  5627. llvm::Type *RealResTy = ConvertType(Ty);
  5628. llvm::Type *PtrTy = llvm::IntegerType::get(
  5629. getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
  5630. LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
  5631. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
  5632. ? Intrinsic::aarch64_ldaxr
  5633. : Intrinsic::aarch64_ldxr,
  5634. PtrTy);
  5635. Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
  5636. if (RealResTy->isPointerTy())
  5637. return Builder.CreateIntToPtr(Val, RealResTy);
  5638. llvm::Type *IntResTy = llvm::IntegerType::get(
  5639. getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
  5640. Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
  5641. return Builder.CreateBitCast(Val, RealResTy);
  5642. }
  5643. if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
  5644. BuiltinID == AArch64::BI__builtin_arm_stlex) &&
  5645. getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
  5646. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
  5647. ? Intrinsic::aarch64_stlxp
  5648. : Intrinsic::aarch64_stxp);
  5649. llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
  5650. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  5651. EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
  5652. Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
  5653. llvm::Value *Val = Builder.CreateLoad(Tmp);
  5654. Value *Arg0 = Builder.CreateExtractValue(Val, 0);
  5655. Value *Arg1 = Builder.CreateExtractValue(Val, 1);
  5656. Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
  5657. Int8PtrTy);
  5658. return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
  5659. }
  5660. if (BuiltinID == AArch64::BI__builtin_arm_strex ||
  5661. BuiltinID == AArch64::BI__builtin_arm_stlex) {
  5662. Value *StoreVal = EmitScalarExpr(E->getArg(0));
  5663. Value *StoreAddr = EmitScalarExpr(E->getArg(1));
  5664. QualType Ty = E->getArg(0)->getType();
  5665. llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
  5666. getContext().getTypeSize(Ty));
  5667. StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
  5668. if (StoreVal->getType()->isPointerTy())
  5669. StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
  5670. else {
  5671. llvm::Type *IntTy = llvm::IntegerType::get(
  5672. getLLVMContext(),
  5673. CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
  5674. StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
  5675. StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
  5676. }
  5677. Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
  5678. ? Intrinsic::aarch64_stlxr
  5679. : Intrinsic::aarch64_stxr,
  5680. StoreAddr->getType());
  5681. return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
  5682. }
  5683. if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
  5684. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
  5685. return Builder.CreateCall(F);
  5686. }
  5687. // CRC32
  5688. Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
  5689. switch (BuiltinID) {
  5690. case AArch64::BI__builtin_arm_crc32b:
  5691. CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
  5692. case AArch64::BI__builtin_arm_crc32cb:
  5693. CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
  5694. case AArch64::BI__builtin_arm_crc32h:
  5695. CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
  5696. case AArch64::BI__builtin_arm_crc32ch:
  5697. CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
  5698. case AArch64::BI__builtin_arm_crc32w:
  5699. CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
  5700. case AArch64::BI__builtin_arm_crc32cw:
  5701. CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
  5702. case AArch64::BI__builtin_arm_crc32d:
  5703. CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
  5704. case AArch64::BI__builtin_arm_crc32cd:
  5705. CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
  5706. }
  5707. if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
  5708. Value *Arg0 = EmitScalarExpr(E->getArg(0));
  5709. Value *Arg1 = EmitScalarExpr(E->getArg(1));
  5710. Function *F = CGM.getIntrinsic(CRCIntrinsicID);
  5711. llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
  5712. Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
  5713. return Builder.CreateCall(F, {Arg0, Arg1});
  5714. }
  5715. if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
  5716. BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
  5717. BuiltinID == AArch64::BI__builtin_arm_rsrp ||
  5718. BuiltinID == AArch64::BI__builtin_arm_wsr ||
  5719. BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
  5720. BuiltinID == AArch64::BI__builtin_arm_wsrp) {
  5721. bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
  5722. BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
  5723. BuiltinID == AArch64::BI__builtin_arm_rsrp;
  5724. bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
  5725. BuiltinID == AArch64::BI__builtin_arm_wsrp;
  5726. bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
  5727. BuiltinID != AArch64::BI__builtin_arm_wsr;
  5728. llvm::Type *ValueType;
  5729. llvm::Type *RegisterType = Int64Ty;
  5730. if (IsPointerBuiltin) {
  5731. ValueType = VoidPtrTy;
  5732. } else if (Is64Bit) {
  5733. ValueType = Int64Ty;
  5734. } else {
  5735. ValueType = Int32Ty;
  5736. }
  5737. return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
  5738. }
  5739. // Find out if any arguments are required to be integer constant
  5740. // expressions.
  5741. unsigned ICEArguments = 0;
  5742. ASTContext::GetBuiltinTypeError Error;
  5743. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  5744. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  5745. llvm::SmallVector<Value*, 4> Ops;
  5746. for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
  5747. if ((ICEArguments & (1 << i)) == 0) {
  5748. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  5749. } else {
  5750. // If this is required to be a constant, constant fold it so that we know
  5751. // that the generated intrinsic gets a ConstantInt.
  5752. llvm::APSInt Result;
  5753. bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
  5754. assert(IsConst && "Constant arg isn't actually constant?");
  5755. (void)IsConst;
  5756. Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
  5757. }
  5758. }
  5759. auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
  5760. const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
  5761. SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
  5762. if (Builtin) {
  5763. Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
  5764. Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
  5765. assert(Result && "SISD intrinsic should have been handled");
  5766. return Result;
  5767. }
  5768. llvm::APSInt Result;
  5769. const Expr *Arg = E->getArg(E->getNumArgs()-1);
  5770. NeonTypeFlags Type(0);
  5771. if (Arg->isIntegerConstantExpr(Result, getContext()))
  5772. // Determine the type of this overloaded NEON intrinsic.
  5773. Type = NeonTypeFlags(Result.getZExtValue());
  5774. bool usgn = Type.isUnsigned();
  5775. bool quad = Type.isQuad();
  5776. // Handle non-overloaded intrinsics first.
  5777. switch (BuiltinID) {
  5778. default: break;
  5779. case NEON::BI__builtin_neon_vabsh_f16:
  5780. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5781. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
  5782. case NEON::BI__builtin_neon_vldrq_p128: {
  5783. llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
  5784. llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
  5785. Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
  5786. return Builder.CreateAlignedLoad(Int128Ty, Ptr,
  5787. CharUnits::fromQuantity(16));
  5788. }
  5789. case NEON::BI__builtin_neon_vstrq_p128: {
  5790. llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
  5791. Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
  5792. return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
  5793. }
  5794. case NEON::BI__builtin_neon_vcvts_u32_f32:
  5795. case NEON::BI__builtin_neon_vcvtd_u64_f64:
  5796. usgn = true;
  5797. LLVM_FALLTHROUGH;
  5798. case NEON::BI__builtin_neon_vcvts_s32_f32:
  5799. case NEON::BI__builtin_neon_vcvtd_s64_f64: {
  5800. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5801. bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
  5802. llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
  5803. llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
  5804. Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
  5805. if (usgn)
  5806. return Builder.CreateFPToUI(Ops[0], InTy);
  5807. return Builder.CreateFPToSI(Ops[0], InTy);
  5808. }
  5809. case NEON::BI__builtin_neon_vcvts_f32_u32:
  5810. case NEON::BI__builtin_neon_vcvtd_f64_u64:
  5811. usgn = true;
  5812. LLVM_FALLTHROUGH;
  5813. case NEON::BI__builtin_neon_vcvts_f32_s32:
  5814. case NEON::BI__builtin_neon_vcvtd_f64_s64: {
  5815. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5816. bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
  5817. llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
  5818. llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
  5819. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  5820. if (usgn)
  5821. return Builder.CreateUIToFP(Ops[0], FTy);
  5822. return Builder.CreateSIToFP(Ops[0], FTy);
  5823. }
  5824. case NEON::BI__builtin_neon_vcvth_f16_u16:
  5825. case NEON::BI__builtin_neon_vcvth_f16_u32:
  5826. case NEON::BI__builtin_neon_vcvth_f16_u64:
  5827. usgn = true;
  5828. // FALL THROUGH
  5829. case NEON::BI__builtin_neon_vcvth_f16_s16:
  5830. case NEON::BI__builtin_neon_vcvth_f16_s32:
  5831. case NEON::BI__builtin_neon_vcvth_f16_s64: {
  5832. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5833. llvm::Type *FTy = HalfTy;
  5834. llvm::Type *InTy;
  5835. if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
  5836. InTy = Int64Ty;
  5837. else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
  5838. InTy = Int32Ty;
  5839. else
  5840. InTy = Int16Ty;
  5841. Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
  5842. if (usgn)
  5843. return Builder.CreateUIToFP(Ops[0], FTy);
  5844. return Builder.CreateSIToFP(Ops[0], FTy);
  5845. }
  5846. case NEON::BI__builtin_neon_vcvth_u16_f16:
  5847. usgn = true;
  5848. // FALL THROUGH
  5849. case NEON::BI__builtin_neon_vcvth_s16_f16: {
  5850. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5851. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  5852. if (usgn)
  5853. return Builder.CreateFPToUI(Ops[0], Int16Ty);
  5854. return Builder.CreateFPToSI(Ops[0], Int16Ty);
  5855. }
  5856. case NEON::BI__builtin_neon_vcvth_u32_f16:
  5857. usgn = true;
  5858. // FALL THROUGH
  5859. case NEON::BI__builtin_neon_vcvth_s32_f16: {
  5860. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5861. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  5862. if (usgn)
  5863. return Builder.CreateFPToUI(Ops[0], Int32Ty);
  5864. return Builder.CreateFPToSI(Ops[0], Int32Ty);
  5865. }
  5866. case NEON::BI__builtin_neon_vcvth_u64_f16:
  5867. usgn = true;
  5868. // FALL THROUGH
  5869. case NEON::BI__builtin_neon_vcvth_s64_f16: {
  5870. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5871. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  5872. if (usgn)
  5873. return Builder.CreateFPToUI(Ops[0], Int64Ty);
  5874. return Builder.CreateFPToSI(Ops[0], Int64Ty);
  5875. }
  5876. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  5877. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  5878. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  5879. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  5880. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  5881. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  5882. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  5883. case NEON::BI__builtin_neon_vcvtph_s16_f16: {
  5884. unsigned Int;
  5885. llvm::Type* InTy = Int32Ty;
  5886. llvm::Type* FTy = HalfTy;
  5887. llvm::Type *Tys[2] = {InTy, FTy};
  5888. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  5889. switch (BuiltinID) {
  5890. default: llvm_unreachable("missing builtin ID in switch!");
  5891. case NEON::BI__builtin_neon_vcvtah_u16_f16:
  5892. Int = Intrinsic::aarch64_neon_fcvtau; break;
  5893. case NEON::BI__builtin_neon_vcvtmh_u16_f16:
  5894. Int = Intrinsic::aarch64_neon_fcvtmu; break;
  5895. case NEON::BI__builtin_neon_vcvtnh_u16_f16:
  5896. Int = Intrinsic::aarch64_neon_fcvtnu; break;
  5897. case NEON::BI__builtin_neon_vcvtph_u16_f16:
  5898. Int = Intrinsic::aarch64_neon_fcvtpu; break;
  5899. case NEON::BI__builtin_neon_vcvtah_s16_f16:
  5900. Int = Intrinsic::aarch64_neon_fcvtas; break;
  5901. case NEON::BI__builtin_neon_vcvtmh_s16_f16:
  5902. Int = Intrinsic::aarch64_neon_fcvtms; break;
  5903. case NEON::BI__builtin_neon_vcvtnh_s16_f16:
  5904. Int = Intrinsic::aarch64_neon_fcvtns; break;
  5905. case NEON::BI__builtin_neon_vcvtph_s16_f16:
  5906. Int = Intrinsic::aarch64_neon_fcvtps; break;
  5907. }
  5908. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
  5909. return Builder.CreateTrunc(Ops[0], Int16Ty);
  5910. }
  5911. case NEON::BI__builtin_neon_vcaleh_f16:
  5912. case NEON::BI__builtin_neon_vcalth_f16:
  5913. case NEON::BI__builtin_neon_vcageh_f16:
  5914. case NEON::BI__builtin_neon_vcagth_f16: {
  5915. unsigned Int;
  5916. llvm::Type* InTy = Int32Ty;
  5917. llvm::Type* FTy = HalfTy;
  5918. llvm::Type *Tys[2] = {InTy, FTy};
  5919. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  5920. switch (BuiltinID) {
  5921. default: llvm_unreachable("missing builtin ID in switch!");
  5922. case NEON::BI__builtin_neon_vcageh_f16:
  5923. Int = Intrinsic::aarch64_neon_facge; break;
  5924. case NEON::BI__builtin_neon_vcagth_f16:
  5925. Int = Intrinsic::aarch64_neon_facgt; break;
  5926. case NEON::BI__builtin_neon_vcaleh_f16:
  5927. Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
  5928. case NEON::BI__builtin_neon_vcalth_f16:
  5929. Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
  5930. }
  5931. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
  5932. return Builder.CreateTrunc(Ops[0], Int16Ty);
  5933. }
  5934. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  5935. case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
  5936. unsigned Int;
  5937. llvm::Type* InTy = Int32Ty;
  5938. llvm::Type* FTy = HalfTy;
  5939. llvm::Type *Tys[2] = {InTy, FTy};
  5940. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  5941. switch (BuiltinID) {
  5942. default: llvm_unreachable("missing builtin ID in switch!");
  5943. case NEON::BI__builtin_neon_vcvth_n_s16_f16:
  5944. Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
  5945. case NEON::BI__builtin_neon_vcvth_n_u16_f16:
  5946. Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
  5947. }
  5948. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  5949. return Builder.CreateTrunc(Ops[0], Int16Ty);
  5950. }
  5951. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  5952. case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
  5953. unsigned Int;
  5954. llvm::Type* FTy = HalfTy;
  5955. llvm::Type* InTy = Int32Ty;
  5956. llvm::Type *Tys[2] = {FTy, InTy};
  5957. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  5958. switch (BuiltinID) {
  5959. default: llvm_unreachable("missing builtin ID in switch!");
  5960. case NEON::BI__builtin_neon_vcvth_n_f16_s16:
  5961. Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
  5962. Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
  5963. break;
  5964. case NEON::BI__builtin_neon_vcvth_n_f16_u16:
  5965. Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
  5966. Ops[0] = Builder.CreateZExt(Ops[0], InTy);
  5967. break;
  5968. }
  5969. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
  5970. }
  5971. case NEON::BI__builtin_neon_vpaddd_s64: {
  5972. llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
  5973. Value *Vec = EmitScalarExpr(E->getArg(0));
  5974. // The vector is v2f64, so make sure it's bitcast to that.
  5975. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
  5976. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  5977. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  5978. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  5979. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  5980. // Pairwise addition of a v2f64 into a scalar f64.
  5981. return Builder.CreateAdd(Op0, Op1, "vpaddd");
  5982. }
  5983. case NEON::BI__builtin_neon_vpaddd_f64: {
  5984. llvm::Type *Ty =
  5985. llvm::VectorType::get(DoubleTy, 2);
  5986. Value *Vec = EmitScalarExpr(E->getArg(0));
  5987. // The vector is v2f64, so make sure it's bitcast to that.
  5988. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
  5989. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  5990. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  5991. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  5992. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  5993. // Pairwise addition of a v2f64 into a scalar f64.
  5994. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  5995. }
  5996. case NEON::BI__builtin_neon_vpadds_f32: {
  5997. llvm::Type *Ty =
  5998. llvm::VectorType::get(FloatTy, 2);
  5999. Value *Vec = EmitScalarExpr(E->getArg(0));
  6000. // The vector is v2f32, so make sure it's bitcast to that.
  6001. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
  6002. llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
  6003. llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
  6004. Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
  6005. Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
  6006. // Pairwise addition of a v2f32 into a scalar f32.
  6007. return Builder.CreateFAdd(Op0, Op1, "vpaddd");
  6008. }
  6009. case NEON::BI__builtin_neon_vceqzd_s64:
  6010. case NEON::BI__builtin_neon_vceqzd_f64:
  6011. case NEON::BI__builtin_neon_vceqzs_f32:
  6012. case NEON::BI__builtin_neon_vceqzh_f16:
  6013. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6014. return EmitAArch64CompareBuiltinExpr(
  6015. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  6016. ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
  6017. case NEON::BI__builtin_neon_vcgezd_s64:
  6018. case NEON::BI__builtin_neon_vcgezd_f64:
  6019. case NEON::BI__builtin_neon_vcgezs_f32:
  6020. case NEON::BI__builtin_neon_vcgezh_f16:
  6021. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6022. return EmitAArch64CompareBuiltinExpr(
  6023. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  6024. ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
  6025. case NEON::BI__builtin_neon_vclezd_s64:
  6026. case NEON::BI__builtin_neon_vclezd_f64:
  6027. case NEON::BI__builtin_neon_vclezs_f32:
  6028. case NEON::BI__builtin_neon_vclezh_f16:
  6029. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6030. return EmitAArch64CompareBuiltinExpr(
  6031. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  6032. ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
  6033. case NEON::BI__builtin_neon_vcgtzd_s64:
  6034. case NEON::BI__builtin_neon_vcgtzd_f64:
  6035. case NEON::BI__builtin_neon_vcgtzs_f32:
  6036. case NEON::BI__builtin_neon_vcgtzh_f16:
  6037. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6038. return EmitAArch64CompareBuiltinExpr(
  6039. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  6040. ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
  6041. case NEON::BI__builtin_neon_vcltzd_s64:
  6042. case NEON::BI__builtin_neon_vcltzd_f64:
  6043. case NEON::BI__builtin_neon_vcltzs_f32:
  6044. case NEON::BI__builtin_neon_vcltzh_f16:
  6045. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6046. return EmitAArch64CompareBuiltinExpr(
  6047. Ops[0], ConvertType(E->getCallReturnType(getContext())),
  6048. ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
  6049. case NEON::BI__builtin_neon_vceqzd_u64: {
  6050. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6051. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  6052. Ops[0] =
  6053. Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
  6054. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
  6055. }
  6056. case NEON::BI__builtin_neon_vceqd_f64:
  6057. case NEON::BI__builtin_neon_vcled_f64:
  6058. case NEON::BI__builtin_neon_vcltd_f64:
  6059. case NEON::BI__builtin_neon_vcged_f64:
  6060. case NEON::BI__builtin_neon_vcgtd_f64: {
  6061. llvm::CmpInst::Predicate P;
  6062. switch (BuiltinID) {
  6063. default: llvm_unreachable("missing builtin ID in switch!");
  6064. case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
  6065. case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
  6066. case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
  6067. case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
  6068. case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
  6069. }
  6070. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6071. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  6072. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  6073. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  6074. return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
  6075. }
  6076. case NEON::BI__builtin_neon_vceqs_f32:
  6077. case NEON::BI__builtin_neon_vcles_f32:
  6078. case NEON::BI__builtin_neon_vclts_f32:
  6079. case NEON::BI__builtin_neon_vcges_f32:
  6080. case NEON::BI__builtin_neon_vcgts_f32: {
  6081. llvm::CmpInst::Predicate P;
  6082. switch (BuiltinID) {
  6083. default: llvm_unreachable("missing builtin ID in switch!");
  6084. case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
  6085. case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
  6086. case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
  6087. case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
  6088. case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
  6089. }
  6090. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6091. Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
  6092. Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
  6093. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  6094. return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
  6095. }
  6096. case NEON::BI__builtin_neon_vceqh_f16:
  6097. case NEON::BI__builtin_neon_vcleh_f16:
  6098. case NEON::BI__builtin_neon_vclth_f16:
  6099. case NEON::BI__builtin_neon_vcgeh_f16:
  6100. case NEON::BI__builtin_neon_vcgth_f16: {
  6101. llvm::CmpInst::Predicate P;
  6102. switch (BuiltinID) {
  6103. default: llvm_unreachable("missing builtin ID in switch!");
  6104. case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
  6105. case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
  6106. case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
  6107. case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
  6108. case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
  6109. }
  6110. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6111. Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
  6112. Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
  6113. Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
  6114. return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
  6115. }
  6116. case NEON::BI__builtin_neon_vceqd_s64:
  6117. case NEON::BI__builtin_neon_vceqd_u64:
  6118. case NEON::BI__builtin_neon_vcgtd_s64:
  6119. case NEON::BI__builtin_neon_vcgtd_u64:
  6120. case NEON::BI__builtin_neon_vcltd_s64:
  6121. case NEON::BI__builtin_neon_vcltd_u64:
  6122. case NEON::BI__builtin_neon_vcged_u64:
  6123. case NEON::BI__builtin_neon_vcged_s64:
  6124. case NEON::BI__builtin_neon_vcled_u64:
  6125. case NEON::BI__builtin_neon_vcled_s64: {
  6126. llvm::CmpInst::Predicate P;
  6127. switch (BuiltinID) {
  6128. default: llvm_unreachable("missing builtin ID in switch!");
  6129. case NEON::BI__builtin_neon_vceqd_s64:
  6130. case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
  6131. case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
  6132. case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
  6133. case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
  6134. case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
  6135. case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
  6136. case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
  6137. case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
  6138. case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
  6139. }
  6140. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6141. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  6142. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  6143. Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
  6144. return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
  6145. }
  6146. case NEON::BI__builtin_neon_vtstd_s64:
  6147. case NEON::BI__builtin_neon_vtstd_u64: {
  6148. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6149. Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
  6150. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  6151. Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
  6152. Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
  6153. llvm::Constant::getNullValue(Int64Ty));
  6154. return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
  6155. }
  6156. case NEON::BI__builtin_neon_vset_lane_i8:
  6157. case NEON::BI__builtin_neon_vset_lane_i16:
  6158. case NEON::BI__builtin_neon_vset_lane_i32:
  6159. case NEON::BI__builtin_neon_vset_lane_i64:
  6160. case NEON::BI__builtin_neon_vset_lane_f32:
  6161. case NEON::BI__builtin_neon_vsetq_lane_i8:
  6162. case NEON::BI__builtin_neon_vsetq_lane_i16:
  6163. case NEON::BI__builtin_neon_vsetq_lane_i32:
  6164. case NEON::BI__builtin_neon_vsetq_lane_i64:
  6165. case NEON::BI__builtin_neon_vsetq_lane_f32:
  6166. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  6167. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  6168. case NEON::BI__builtin_neon_vset_lane_f64:
  6169. // The vector type needs a cast for the v1f64 variant.
  6170. Ops[1] = Builder.CreateBitCast(Ops[1],
  6171. llvm::VectorType::get(DoubleTy, 1));
  6172. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  6173. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  6174. case NEON::BI__builtin_neon_vsetq_lane_f64:
  6175. // The vector type needs a cast for the v2f64 variant.
  6176. Ops[1] = Builder.CreateBitCast(Ops[1],
  6177. llvm::VectorType::get(DoubleTy, 2));
  6178. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  6179. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
  6180. case NEON::BI__builtin_neon_vget_lane_i8:
  6181. case NEON::BI__builtin_neon_vdupb_lane_i8:
  6182. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
  6183. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6184. "vget_lane");
  6185. case NEON::BI__builtin_neon_vgetq_lane_i8:
  6186. case NEON::BI__builtin_neon_vdupb_laneq_i8:
  6187. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
  6188. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6189. "vgetq_lane");
  6190. case NEON::BI__builtin_neon_vget_lane_i16:
  6191. case NEON::BI__builtin_neon_vduph_lane_i16:
  6192. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
  6193. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6194. "vget_lane");
  6195. case NEON::BI__builtin_neon_vgetq_lane_i16:
  6196. case NEON::BI__builtin_neon_vduph_laneq_i16:
  6197. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
  6198. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6199. "vgetq_lane");
  6200. case NEON::BI__builtin_neon_vget_lane_i32:
  6201. case NEON::BI__builtin_neon_vdups_lane_i32:
  6202. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
  6203. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6204. "vget_lane");
  6205. case NEON::BI__builtin_neon_vdups_lane_f32:
  6206. Ops[0] = Builder.CreateBitCast(Ops[0],
  6207. llvm::VectorType::get(FloatTy, 2));
  6208. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6209. "vdups_lane");
  6210. case NEON::BI__builtin_neon_vgetq_lane_i32:
  6211. case NEON::BI__builtin_neon_vdups_laneq_i32:
  6212. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
  6213. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6214. "vgetq_lane");
  6215. case NEON::BI__builtin_neon_vget_lane_i64:
  6216. case NEON::BI__builtin_neon_vdupd_lane_i64:
  6217. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
  6218. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6219. "vget_lane");
  6220. case NEON::BI__builtin_neon_vdupd_lane_f64:
  6221. Ops[0] = Builder.CreateBitCast(Ops[0],
  6222. llvm::VectorType::get(DoubleTy, 1));
  6223. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6224. "vdupd_lane");
  6225. case NEON::BI__builtin_neon_vgetq_lane_i64:
  6226. case NEON::BI__builtin_neon_vdupd_laneq_i64:
  6227. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
  6228. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6229. "vgetq_lane");
  6230. case NEON::BI__builtin_neon_vget_lane_f32:
  6231. Ops[0] = Builder.CreateBitCast(Ops[0],
  6232. llvm::VectorType::get(FloatTy, 2));
  6233. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6234. "vget_lane");
  6235. case NEON::BI__builtin_neon_vget_lane_f64:
  6236. Ops[0] = Builder.CreateBitCast(Ops[0],
  6237. llvm::VectorType::get(DoubleTy, 1));
  6238. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6239. "vget_lane");
  6240. case NEON::BI__builtin_neon_vgetq_lane_f32:
  6241. case NEON::BI__builtin_neon_vdups_laneq_f32:
  6242. Ops[0] = Builder.CreateBitCast(Ops[0],
  6243. llvm::VectorType::get(FloatTy, 4));
  6244. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6245. "vgetq_lane");
  6246. case NEON::BI__builtin_neon_vgetq_lane_f64:
  6247. case NEON::BI__builtin_neon_vdupd_laneq_f64:
  6248. Ops[0] = Builder.CreateBitCast(Ops[0],
  6249. llvm::VectorType::get(DoubleTy, 2));
  6250. return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
  6251. "vgetq_lane");
  6252. case NEON::BI__builtin_neon_vaddh_f16:
  6253. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6254. return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
  6255. case NEON::BI__builtin_neon_vsubh_f16:
  6256. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6257. return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
  6258. case NEON::BI__builtin_neon_vmulh_f16:
  6259. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6260. return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
  6261. case NEON::BI__builtin_neon_vdivh_f16:
  6262. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6263. return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
  6264. case NEON::BI__builtin_neon_vfmah_f16: {
  6265. Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
  6266. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  6267. return Builder.CreateCall(F,
  6268. {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
  6269. }
  6270. case NEON::BI__builtin_neon_vfmsh_f16: {
  6271. Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
  6272. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
  6273. Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
  6274. // NEON intrinsic puts accumulator first, unlike the LLVM fma.
  6275. return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
  6276. }
  6277. case NEON::BI__builtin_neon_vaddd_s64:
  6278. case NEON::BI__builtin_neon_vaddd_u64:
  6279. return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
  6280. case NEON::BI__builtin_neon_vsubd_s64:
  6281. case NEON::BI__builtin_neon_vsubd_u64:
  6282. return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
  6283. case NEON::BI__builtin_neon_vqdmlalh_s16:
  6284. case NEON::BI__builtin_neon_vqdmlslh_s16: {
  6285. SmallVector<Value *, 2> ProductOps;
  6286. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  6287. ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
  6288. llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
  6289. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  6290. ProductOps, "vqdmlXl");
  6291. Constant *CI = ConstantInt::get(SizeTy, 0);
  6292. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  6293. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
  6294. ? Intrinsic::aarch64_neon_sqadd
  6295. : Intrinsic::aarch64_neon_sqsub;
  6296. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
  6297. }
  6298. case NEON::BI__builtin_neon_vqshlud_n_s64: {
  6299. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6300. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  6301. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
  6302. Ops, "vqshlu_n");
  6303. }
  6304. case NEON::BI__builtin_neon_vqshld_n_u64:
  6305. case NEON::BI__builtin_neon_vqshld_n_s64: {
  6306. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
  6307. ? Intrinsic::aarch64_neon_uqshl
  6308. : Intrinsic::aarch64_neon_sqshl;
  6309. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6310. Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
  6311. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
  6312. }
  6313. case NEON::BI__builtin_neon_vrshrd_n_u64:
  6314. case NEON::BI__builtin_neon_vrshrd_n_s64: {
  6315. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
  6316. ? Intrinsic::aarch64_neon_urshl
  6317. : Intrinsic::aarch64_neon_srshl;
  6318. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6319. int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
  6320. Ops[1] = ConstantInt::get(Int64Ty, -SV);
  6321. return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
  6322. }
  6323. case NEON::BI__builtin_neon_vrsrad_n_u64:
  6324. case NEON::BI__builtin_neon_vrsrad_n_s64: {
  6325. unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
  6326. ? Intrinsic::aarch64_neon_urshl
  6327. : Intrinsic::aarch64_neon_srshl;
  6328. Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
  6329. Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
  6330. Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
  6331. {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
  6332. return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
  6333. }
  6334. case NEON::BI__builtin_neon_vshld_n_s64:
  6335. case NEON::BI__builtin_neon_vshld_n_u64: {
  6336. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  6337. return Builder.CreateShl(
  6338. Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
  6339. }
  6340. case NEON::BI__builtin_neon_vshrd_n_s64: {
  6341. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  6342. return Builder.CreateAShr(
  6343. Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  6344. Amt->getZExtValue())),
  6345. "shrd_n");
  6346. }
  6347. case NEON::BI__builtin_neon_vshrd_n_u64: {
  6348. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
  6349. uint64_t ShiftAmt = Amt->getZExtValue();
  6350. // Right-shifting an unsigned value by its size yields 0.
  6351. if (ShiftAmt == 64)
  6352. return ConstantInt::get(Int64Ty, 0);
  6353. return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
  6354. "shrd_n");
  6355. }
  6356. case NEON::BI__builtin_neon_vsrad_n_s64: {
  6357. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  6358. Ops[1] = Builder.CreateAShr(
  6359. Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
  6360. Amt->getZExtValue())),
  6361. "shrd_n");
  6362. return Builder.CreateAdd(Ops[0], Ops[1]);
  6363. }
  6364. case NEON::BI__builtin_neon_vsrad_n_u64: {
  6365. llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
  6366. uint64_t ShiftAmt = Amt->getZExtValue();
  6367. // Right-shifting an unsigned value by its size yields 0.
  6368. // As Op + 0 = Op, return Ops[0] directly.
  6369. if (ShiftAmt == 64)
  6370. return Ops[0];
  6371. Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
  6372. "shrd_n");
  6373. return Builder.CreateAdd(Ops[0], Ops[1]);
  6374. }
  6375. case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
  6376. case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
  6377. case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
  6378. case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
  6379. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  6380. "lane");
  6381. SmallVector<Value *, 2> ProductOps;
  6382. ProductOps.push_back(vectorWrapScalar16(Ops[1]));
  6383. ProductOps.push_back(vectorWrapScalar16(Ops[2]));
  6384. llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
  6385. Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
  6386. ProductOps, "vqdmlXl");
  6387. Constant *CI = ConstantInt::get(SizeTy, 0);
  6388. Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
  6389. Ops.pop_back();
  6390. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
  6391. BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
  6392. ? Intrinsic::aarch64_neon_sqadd
  6393. : Intrinsic::aarch64_neon_sqsub;
  6394. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
  6395. }
  6396. case NEON::BI__builtin_neon_vqdmlals_s32:
  6397. case NEON::BI__builtin_neon_vqdmlsls_s32: {
  6398. SmallVector<Value *, 2> ProductOps;
  6399. ProductOps.push_back(Ops[1]);
  6400. ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
  6401. Ops[1] =
  6402. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  6403. ProductOps, "vqdmlXl");
  6404. unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
  6405. ? Intrinsic::aarch64_neon_sqadd
  6406. : Intrinsic::aarch64_neon_sqsub;
  6407. return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
  6408. }
  6409. case NEON::BI__builtin_neon_vqdmlals_lane_s32:
  6410. case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
  6411. case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
  6412. case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
  6413. Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
  6414. "lane");
  6415. SmallVector<Value *, 2> ProductOps;
  6416. ProductOps.push_back(Ops[1]);
  6417. ProductOps.push_back(Ops[2]);
  6418. Ops[1] =
  6419. EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
  6420. ProductOps, "vqdmlXl");
  6421. Ops.pop_back();
  6422. unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
  6423. BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
  6424. ? Intrinsic::aarch64_neon_sqadd
  6425. : Intrinsic::aarch64_neon_sqsub;
  6426. return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
  6427. }
  6428. }
  6429. llvm::VectorType *VTy = GetNeonType(this, Type);
  6430. llvm::Type *Ty = VTy;
  6431. if (!Ty)
  6432. return nullptr;
  6433. // Not all intrinsics handled by the common case work for AArch64 yet, so only
  6434. // defer to common code if it's been added to our special map.
  6435. Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
  6436. AArch64SIMDIntrinsicsProvenSorted);
  6437. if (Builtin)
  6438. return EmitCommonNeonBuiltinExpr(
  6439. Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
  6440. Builtin->NameHint, Builtin->TypeModifier, E, Ops,
  6441. /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
  6442. if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
  6443. return V;
  6444. unsigned Int;
  6445. switch (BuiltinID) {
  6446. default: return nullptr;
  6447. case NEON::BI__builtin_neon_vbsl_v:
  6448. case NEON::BI__builtin_neon_vbslq_v: {
  6449. llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
  6450. Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
  6451. Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
  6452. Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
  6453. Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
  6454. Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
  6455. Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
  6456. return Builder.CreateBitCast(Ops[0], Ty);
  6457. }
  6458. case NEON::BI__builtin_neon_vfma_lane_v:
  6459. case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
  6460. // The ARM builtins (and instructions) have the addend as the first
  6461. // operand, but the 'fma' intrinsics have it last. Swap it around here.
  6462. Value *Addend = Ops[0];
  6463. Value *Multiplicand = Ops[1];
  6464. Value *LaneSource = Ops[2];
  6465. Ops[0] = Multiplicand;
  6466. Ops[1] = LaneSource;
  6467. Ops[2] = Addend;
  6468. // Now adjust things to handle the lane access.
  6469. llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
  6470. llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
  6471. VTy;
  6472. llvm::Constant *cst = cast<Constant>(Ops[3]);
  6473. Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
  6474. Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
  6475. Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
  6476. Ops.pop_back();
  6477. Int = Intrinsic::fma;
  6478. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
  6479. }
  6480. case NEON::BI__builtin_neon_vfma_laneq_v: {
  6481. llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
  6482. // v1f64 fma should be mapped to Neon scalar f64 fma
  6483. if (VTy && VTy->getElementType() == DoubleTy) {
  6484. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  6485. Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
  6486. llvm::Type *VTy = GetNeonType(this,
  6487. NeonTypeFlags(NeonTypeFlags::Float64, false, true));
  6488. Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
  6489. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  6490. Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
  6491. Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
  6492. return Builder.CreateBitCast(Result, Ty);
  6493. }
  6494. Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
  6495. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6496. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6497. llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
  6498. VTy->getNumElements() * 2);
  6499. Ops[2] = Builder.CreateBitCast(Ops[2], STy);
  6500. Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
  6501. cast<ConstantInt>(Ops[3]));
  6502. Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
  6503. return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
  6504. }
  6505. case NEON::BI__builtin_neon_vfmaq_laneq_v: {
  6506. Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
  6507. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6508. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  6509. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  6510. Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
  6511. return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
  6512. }
  6513. case NEON::BI__builtin_neon_vfmah_lane_f16:
  6514. case NEON::BI__builtin_neon_vfmas_lane_f32:
  6515. case NEON::BI__builtin_neon_vfmah_laneq_f16:
  6516. case NEON::BI__builtin_neon_vfmas_laneq_f32:
  6517. case NEON::BI__builtin_neon_vfmad_lane_f64:
  6518. case NEON::BI__builtin_neon_vfmad_laneq_f64: {
  6519. Ops.push_back(EmitScalarExpr(E->getArg(3)));
  6520. llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
  6521. Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
  6522. Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
  6523. return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
  6524. }
  6525. case NEON::BI__builtin_neon_vmull_v:
  6526. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6527. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
  6528. if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
  6529. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
  6530. case NEON::BI__builtin_neon_vmax_v:
  6531. case NEON::BI__builtin_neon_vmaxq_v:
  6532. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6533. Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
  6534. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
  6535. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
  6536. case NEON::BI__builtin_neon_vmaxh_f16: {
  6537. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6538. Int = Intrinsic::aarch64_neon_fmax;
  6539. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
  6540. }
  6541. case NEON::BI__builtin_neon_vmin_v:
  6542. case NEON::BI__builtin_neon_vminq_v:
  6543. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6544. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
  6545. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
  6546. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
  6547. case NEON::BI__builtin_neon_vminh_f16: {
  6548. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6549. Int = Intrinsic::aarch64_neon_fmin;
  6550. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
  6551. }
  6552. case NEON::BI__builtin_neon_vabd_v:
  6553. case NEON::BI__builtin_neon_vabdq_v:
  6554. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6555. Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
  6556. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
  6557. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
  6558. case NEON::BI__builtin_neon_vpadal_v:
  6559. case NEON::BI__builtin_neon_vpadalq_v: {
  6560. unsigned ArgElts = VTy->getNumElements();
  6561. llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
  6562. unsigned BitWidth = EltTy->getBitWidth();
  6563. llvm::Type *ArgTy = llvm::VectorType::get(
  6564. llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
  6565. llvm::Type* Tys[2] = { VTy, ArgTy };
  6566. Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
  6567. SmallVector<llvm::Value*, 1> TmpOps;
  6568. TmpOps.push_back(Ops[1]);
  6569. Function *F = CGM.getIntrinsic(Int, Tys);
  6570. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
  6571. llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
  6572. return Builder.CreateAdd(tmp, addend);
  6573. }
  6574. case NEON::BI__builtin_neon_vpmin_v:
  6575. case NEON::BI__builtin_neon_vpminq_v:
  6576. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6577. Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
  6578. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
  6579. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
  6580. case NEON::BI__builtin_neon_vpmax_v:
  6581. case NEON::BI__builtin_neon_vpmaxq_v:
  6582. // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
  6583. Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
  6584. if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
  6585. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
  6586. case NEON::BI__builtin_neon_vminnm_v:
  6587. case NEON::BI__builtin_neon_vminnmq_v:
  6588. Int = Intrinsic::aarch64_neon_fminnm;
  6589. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
  6590. case NEON::BI__builtin_neon_vminnmh_f16:
  6591. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6592. Int = Intrinsic::aarch64_neon_fminnm;
  6593. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
  6594. case NEON::BI__builtin_neon_vmaxnm_v:
  6595. case NEON::BI__builtin_neon_vmaxnmq_v:
  6596. Int = Intrinsic::aarch64_neon_fmaxnm;
  6597. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
  6598. case NEON::BI__builtin_neon_vmaxnmh_f16:
  6599. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6600. Int = Intrinsic::aarch64_neon_fmaxnm;
  6601. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
  6602. case NEON::BI__builtin_neon_vrecpss_f32: {
  6603. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6604. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
  6605. Ops, "vrecps");
  6606. }
  6607. case NEON::BI__builtin_neon_vrecpsd_f64:
  6608. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6609. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
  6610. Ops, "vrecps");
  6611. case NEON::BI__builtin_neon_vrecpsh_f16:
  6612. Ops.push_back(EmitScalarExpr(E->getArg(1)));
  6613. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
  6614. Ops, "vrecps");
  6615. case NEON::BI__builtin_neon_vqshrun_n_v:
  6616. Int = Intrinsic::aarch64_neon_sqshrun;
  6617. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
  6618. case NEON::BI__builtin_neon_vqrshrun_n_v:
  6619. Int = Intrinsic::aarch64_neon_sqrshrun;
  6620. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
  6621. case NEON::BI__builtin_neon_vqshrn_n_v:
  6622. Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
  6623. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
  6624. case NEON::BI__builtin_neon_vrshrn_n_v:
  6625. Int = Intrinsic::aarch64_neon_rshrn;
  6626. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
  6627. case NEON::BI__builtin_neon_vqrshrn_n_v:
  6628. Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
  6629. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
  6630. case NEON::BI__builtin_neon_vrndah_f16: {
  6631. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6632. Int = Intrinsic::round;
  6633. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
  6634. }
  6635. case NEON::BI__builtin_neon_vrnda_v:
  6636. case NEON::BI__builtin_neon_vrndaq_v: {
  6637. Int = Intrinsic::round;
  6638. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
  6639. }
  6640. case NEON::BI__builtin_neon_vrndih_f16: {
  6641. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6642. Int = Intrinsic::nearbyint;
  6643. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
  6644. }
  6645. case NEON::BI__builtin_neon_vrndi_v:
  6646. case NEON::BI__builtin_neon_vrndiq_v: {
  6647. Int = Intrinsic::nearbyint;
  6648. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
  6649. }
  6650. case NEON::BI__builtin_neon_vrndmh_f16: {
  6651. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6652. Int = Intrinsic::floor;
  6653. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
  6654. }
  6655. case NEON::BI__builtin_neon_vrndm_v:
  6656. case NEON::BI__builtin_neon_vrndmq_v: {
  6657. Int = Intrinsic::floor;
  6658. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
  6659. }
  6660. case NEON::BI__builtin_neon_vrndnh_f16: {
  6661. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6662. Int = Intrinsic::aarch64_neon_frintn;
  6663. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
  6664. }
  6665. case NEON::BI__builtin_neon_vrndn_v:
  6666. case NEON::BI__builtin_neon_vrndnq_v: {
  6667. Int = Intrinsic::aarch64_neon_frintn;
  6668. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
  6669. }
  6670. case NEON::BI__builtin_neon_vrndph_f16: {
  6671. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6672. Int = Intrinsic::ceil;
  6673. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
  6674. }
  6675. case NEON::BI__builtin_neon_vrndp_v:
  6676. case NEON::BI__builtin_neon_vrndpq_v: {
  6677. Int = Intrinsic::ceil;
  6678. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
  6679. }
  6680. case NEON::BI__builtin_neon_vrndxh_f16: {
  6681. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6682. Int = Intrinsic::rint;
  6683. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
  6684. }
  6685. case NEON::BI__builtin_neon_vrndx_v:
  6686. case NEON::BI__builtin_neon_vrndxq_v: {
  6687. Int = Intrinsic::rint;
  6688. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
  6689. }
  6690. case NEON::BI__builtin_neon_vrndh_f16: {
  6691. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6692. Int = Intrinsic::trunc;
  6693. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
  6694. }
  6695. case NEON::BI__builtin_neon_vrnd_v:
  6696. case NEON::BI__builtin_neon_vrndq_v: {
  6697. Int = Intrinsic::trunc;
  6698. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
  6699. }
  6700. case NEON::BI__builtin_neon_vcvt_f64_v:
  6701. case NEON::BI__builtin_neon_vcvtq_f64_v:
  6702. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6703. Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
  6704. return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
  6705. : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
  6706. case NEON::BI__builtin_neon_vcvt_f64_f32: {
  6707. assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
  6708. "unexpected vcvt_f64_f32 builtin");
  6709. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
  6710. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  6711. return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
  6712. }
  6713. case NEON::BI__builtin_neon_vcvt_f32_f64: {
  6714. assert(Type.getEltType() == NeonTypeFlags::Float32 &&
  6715. "unexpected vcvt_f32_f64 builtin");
  6716. NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
  6717. Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
  6718. return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
  6719. }
  6720. case NEON::BI__builtin_neon_vcvt_s32_v:
  6721. case NEON::BI__builtin_neon_vcvt_u32_v:
  6722. case NEON::BI__builtin_neon_vcvt_s64_v:
  6723. case NEON::BI__builtin_neon_vcvt_u64_v:
  6724. case NEON::BI__builtin_neon_vcvt_s16_v:
  6725. case NEON::BI__builtin_neon_vcvt_u16_v:
  6726. case NEON::BI__builtin_neon_vcvtq_s32_v:
  6727. case NEON::BI__builtin_neon_vcvtq_u32_v:
  6728. case NEON::BI__builtin_neon_vcvtq_s64_v:
  6729. case NEON::BI__builtin_neon_vcvtq_u64_v:
  6730. case NEON::BI__builtin_neon_vcvtq_s16_v:
  6731. case NEON::BI__builtin_neon_vcvtq_u16_v: {
  6732. Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
  6733. if (usgn)
  6734. return Builder.CreateFPToUI(Ops[0], Ty);
  6735. return Builder.CreateFPToSI(Ops[0], Ty);
  6736. }
  6737. case NEON::BI__builtin_neon_vcvta_s16_v:
  6738. case NEON::BI__builtin_neon_vcvta_s32_v:
  6739. case NEON::BI__builtin_neon_vcvtaq_s16_v:
  6740. case NEON::BI__builtin_neon_vcvtaq_s32_v:
  6741. case NEON::BI__builtin_neon_vcvta_u32_v:
  6742. case NEON::BI__builtin_neon_vcvtaq_u16_v:
  6743. case NEON::BI__builtin_neon_vcvtaq_u32_v:
  6744. case NEON::BI__builtin_neon_vcvta_s64_v:
  6745. case NEON::BI__builtin_neon_vcvtaq_s64_v:
  6746. case NEON::BI__builtin_neon_vcvta_u64_v:
  6747. case NEON::BI__builtin_neon_vcvtaq_u64_v: {
  6748. Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
  6749. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6750. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
  6751. }
  6752. case NEON::BI__builtin_neon_vcvtm_s16_v:
  6753. case NEON::BI__builtin_neon_vcvtm_s32_v:
  6754. case NEON::BI__builtin_neon_vcvtmq_s16_v:
  6755. case NEON::BI__builtin_neon_vcvtmq_s32_v:
  6756. case NEON::BI__builtin_neon_vcvtm_u16_v:
  6757. case NEON::BI__builtin_neon_vcvtm_u32_v:
  6758. case NEON::BI__builtin_neon_vcvtmq_u16_v:
  6759. case NEON::BI__builtin_neon_vcvtmq_u32_v:
  6760. case NEON::BI__builtin_neon_vcvtm_s64_v:
  6761. case NEON::BI__builtin_neon_vcvtmq_s64_v:
  6762. case NEON::BI__builtin_neon_vcvtm_u64_v:
  6763. case NEON::BI__builtin_neon_vcvtmq_u64_v: {
  6764. Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
  6765. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6766. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
  6767. }
  6768. case NEON::BI__builtin_neon_vcvtn_s16_v:
  6769. case NEON::BI__builtin_neon_vcvtn_s32_v:
  6770. case NEON::BI__builtin_neon_vcvtnq_s16_v:
  6771. case NEON::BI__builtin_neon_vcvtnq_s32_v:
  6772. case NEON::BI__builtin_neon_vcvtn_u16_v:
  6773. case NEON::BI__builtin_neon_vcvtn_u32_v:
  6774. case NEON::BI__builtin_neon_vcvtnq_u16_v:
  6775. case NEON::BI__builtin_neon_vcvtnq_u32_v:
  6776. case NEON::BI__builtin_neon_vcvtn_s64_v:
  6777. case NEON::BI__builtin_neon_vcvtnq_s64_v:
  6778. case NEON::BI__builtin_neon_vcvtn_u64_v:
  6779. case NEON::BI__builtin_neon_vcvtnq_u64_v: {
  6780. Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
  6781. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6782. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
  6783. }
  6784. case NEON::BI__builtin_neon_vcvtp_s16_v:
  6785. case NEON::BI__builtin_neon_vcvtp_s32_v:
  6786. case NEON::BI__builtin_neon_vcvtpq_s16_v:
  6787. case NEON::BI__builtin_neon_vcvtpq_s32_v:
  6788. case NEON::BI__builtin_neon_vcvtp_u16_v:
  6789. case NEON::BI__builtin_neon_vcvtp_u32_v:
  6790. case NEON::BI__builtin_neon_vcvtpq_u16_v:
  6791. case NEON::BI__builtin_neon_vcvtpq_u32_v:
  6792. case NEON::BI__builtin_neon_vcvtp_s64_v:
  6793. case NEON::BI__builtin_neon_vcvtpq_s64_v:
  6794. case NEON::BI__builtin_neon_vcvtp_u64_v:
  6795. case NEON::BI__builtin_neon_vcvtpq_u64_v: {
  6796. Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
  6797. llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
  6798. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
  6799. }
  6800. case NEON::BI__builtin_neon_vmulx_v:
  6801. case NEON::BI__builtin_neon_vmulxq_v: {
  6802. Int = Intrinsic::aarch64_neon_fmulx;
  6803. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
  6804. }
  6805. case NEON::BI__builtin_neon_vmulxh_lane_f16:
  6806. case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
  6807. // vmulx_lane should be mapped to Neon scalar mulx after
  6808. // extracting the scalar element
  6809. Ops.push_back(EmitScalarExpr(E->getArg(2)));
  6810. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  6811. Ops.pop_back();
  6812. Int = Intrinsic::aarch64_neon_fmulx;
  6813. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
  6814. }
  6815. case NEON::BI__builtin_neon_vmul_lane_v:
  6816. case NEON::BI__builtin_neon_vmul_laneq_v: {
  6817. // v1f64 vmul_lane should be mapped to Neon scalar mul lane
  6818. bool Quad = false;
  6819. if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
  6820. Quad = true;
  6821. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  6822. llvm::Type *VTy = GetNeonType(this,
  6823. NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
  6824. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  6825. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
  6826. Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
  6827. return Builder.CreateBitCast(Result, Ty);
  6828. }
  6829. case NEON::BI__builtin_neon_vnegd_s64:
  6830. return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
  6831. case NEON::BI__builtin_neon_vnegh_f16:
  6832. return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
  6833. case NEON::BI__builtin_neon_vpmaxnm_v:
  6834. case NEON::BI__builtin_neon_vpmaxnmq_v: {
  6835. Int = Intrinsic::aarch64_neon_fmaxnmp;
  6836. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
  6837. }
  6838. case NEON::BI__builtin_neon_vpminnm_v:
  6839. case NEON::BI__builtin_neon_vpminnmq_v: {
  6840. Int = Intrinsic::aarch64_neon_fminnmp;
  6841. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
  6842. }
  6843. case NEON::BI__builtin_neon_vsqrth_f16: {
  6844. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6845. Int = Intrinsic::sqrt;
  6846. return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
  6847. }
  6848. case NEON::BI__builtin_neon_vsqrt_v:
  6849. case NEON::BI__builtin_neon_vsqrtq_v: {
  6850. Int = Intrinsic::sqrt;
  6851. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  6852. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
  6853. }
  6854. case NEON::BI__builtin_neon_vrbit_v:
  6855. case NEON::BI__builtin_neon_vrbitq_v: {
  6856. Int = Intrinsic::aarch64_neon_rbit;
  6857. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
  6858. }
  6859. case NEON::BI__builtin_neon_vaddv_u8:
  6860. // FIXME: These are handled by the AArch64 scalar code.
  6861. usgn = true;
  6862. LLVM_FALLTHROUGH;
  6863. case NEON::BI__builtin_neon_vaddv_s8: {
  6864. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  6865. Ty = Int32Ty;
  6866. VTy = llvm::VectorType::get(Int8Ty, 8);
  6867. llvm::Type *Tys[2] = { Ty, VTy };
  6868. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6869. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  6870. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6871. }
  6872. case NEON::BI__builtin_neon_vaddv_u16:
  6873. usgn = true;
  6874. LLVM_FALLTHROUGH;
  6875. case NEON::BI__builtin_neon_vaddv_s16: {
  6876. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  6877. Ty = Int32Ty;
  6878. VTy = llvm::VectorType::get(Int16Ty, 4);
  6879. llvm::Type *Tys[2] = { Ty, VTy };
  6880. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6881. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  6882. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6883. }
  6884. case NEON::BI__builtin_neon_vaddvq_u8:
  6885. usgn = true;
  6886. LLVM_FALLTHROUGH;
  6887. case NEON::BI__builtin_neon_vaddvq_s8: {
  6888. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  6889. Ty = Int32Ty;
  6890. VTy = llvm::VectorType::get(Int8Ty, 16);
  6891. llvm::Type *Tys[2] = { Ty, VTy };
  6892. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6893. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  6894. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6895. }
  6896. case NEON::BI__builtin_neon_vaddvq_u16:
  6897. usgn = true;
  6898. LLVM_FALLTHROUGH;
  6899. case NEON::BI__builtin_neon_vaddvq_s16: {
  6900. Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
  6901. Ty = Int32Ty;
  6902. VTy = llvm::VectorType::get(Int16Ty, 8);
  6903. llvm::Type *Tys[2] = { Ty, VTy };
  6904. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6905. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
  6906. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6907. }
  6908. case NEON::BI__builtin_neon_vmaxv_u8: {
  6909. Int = Intrinsic::aarch64_neon_umaxv;
  6910. Ty = Int32Ty;
  6911. VTy = llvm::VectorType::get(Int8Ty, 8);
  6912. llvm::Type *Tys[2] = { Ty, VTy };
  6913. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6914. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6915. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6916. }
  6917. case NEON::BI__builtin_neon_vmaxv_u16: {
  6918. Int = Intrinsic::aarch64_neon_umaxv;
  6919. Ty = Int32Ty;
  6920. VTy = llvm::VectorType::get(Int16Ty, 4);
  6921. llvm::Type *Tys[2] = { Ty, VTy };
  6922. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6923. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6924. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6925. }
  6926. case NEON::BI__builtin_neon_vmaxvq_u8: {
  6927. Int = Intrinsic::aarch64_neon_umaxv;
  6928. Ty = Int32Ty;
  6929. VTy = llvm::VectorType::get(Int8Ty, 16);
  6930. llvm::Type *Tys[2] = { Ty, VTy };
  6931. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6932. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6933. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6934. }
  6935. case NEON::BI__builtin_neon_vmaxvq_u16: {
  6936. Int = Intrinsic::aarch64_neon_umaxv;
  6937. Ty = Int32Ty;
  6938. VTy = llvm::VectorType::get(Int16Ty, 8);
  6939. llvm::Type *Tys[2] = { Ty, VTy };
  6940. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6941. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6942. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6943. }
  6944. case NEON::BI__builtin_neon_vmaxv_s8: {
  6945. Int = Intrinsic::aarch64_neon_smaxv;
  6946. Ty = Int32Ty;
  6947. VTy = llvm::VectorType::get(Int8Ty, 8);
  6948. llvm::Type *Tys[2] = { Ty, VTy };
  6949. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6950. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6951. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6952. }
  6953. case NEON::BI__builtin_neon_vmaxv_s16: {
  6954. Int = Intrinsic::aarch64_neon_smaxv;
  6955. Ty = Int32Ty;
  6956. VTy = llvm::VectorType::get(Int16Ty, 4);
  6957. llvm::Type *Tys[2] = { Ty, VTy };
  6958. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6959. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6960. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6961. }
  6962. case NEON::BI__builtin_neon_vmaxvq_s8: {
  6963. Int = Intrinsic::aarch64_neon_smaxv;
  6964. Ty = Int32Ty;
  6965. VTy = llvm::VectorType::get(Int8Ty, 16);
  6966. llvm::Type *Tys[2] = { Ty, VTy };
  6967. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6968. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6969. return Builder.CreateTrunc(Ops[0], Int8Ty);
  6970. }
  6971. case NEON::BI__builtin_neon_vmaxvq_s16: {
  6972. Int = Intrinsic::aarch64_neon_smaxv;
  6973. Ty = Int32Ty;
  6974. VTy = llvm::VectorType::get(Int16Ty, 8);
  6975. llvm::Type *Tys[2] = { Ty, VTy };
  6976. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6977. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6978. return Builder.CreateTrunc(Ops[0], Int16Ty);
  6979. }
  6980. case NEON::BI__builtin_neon_vmaxv_f16: {
  6981. Int = Intrinsic::aarch64_neon_fmaxv;
  6982. Ty = HalfTy;
  6983. VTy = llvm::VectorType::get(HalfTy, 4);
  6984. llvm::Type *Tys[2] = { Ty, VTy };
  6985. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6986. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6987. return Builder.CreateTrunc(Ops[0], HalfTy);
  6988. }
  6989. case NEON::BI__builtin_neon_vmaxvq_f16: {
  6990. Int = Intrinsic::aarch64_neon_fmaxv;
  6991. Ty = HalfTy;
  6992. VTy = llvm::VectorType::get(HalfTy, 8);
  6993. llvm::Type *Tys[2] = { Ty, VTy };
  6994. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  6995. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
  6996. return Builder.CreateTrunc(Ops[0], HalfTy);
  6997. }
  6998. case NEON::BI__builtin_neon_vminv_u8: {
  6999. Int = Intrinsic::aarch64_neon_uminv;
  7000. Ty = Int32Ty;
  7001. VTy = llvm::VectorType::get(Int8Ty, 8);
  7002. llvm::Type *Tys[2] = { Ty, VTy };
  7003. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7004. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7005. return Builder.CreateTrunc(Ops[0], Int8Ty);
  7006. }
  7007. case NEON::BI__builtin_neon_vminv_u16: {
  7008. Int = Intrinsic::aarch64_neon_uminv;
  7009. Ty = Int32Ty;
  7010. VTy = llvm::VectorType::get(Int16Ty, 4);
  7011. llvm::Type *Tys[2] = { Ty, VTy };
  7012. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7013. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7014. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7015. }
  7016. case NEON::BI__builtin_neon_vminvq_u8: {
  7017. Int = Intrinsic::aarch64_neon_uminv;
  7018. Ty = Int32Ty;
  7019. VTy = llvm::VectorType::get(Int8Ty, 16);
  7020. llvm::Type *Tys[2] = { Ty, VTy };
  7021. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7022. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7023. return Builder.CreateTrunc(Ops[0], Int8Ty);
  7024. }
  7025. case NEON::BI__builtin_neon_vminvq_u16: {
  7026. Int = Intrinsic::aarch64_neon_uminv;
  7027. Ty = Int32Ty;
  7028. VTy = llvm::VectorType::get(Int16Ty, 8);
  7029. llvm::Type *Tys[2] = { Ty, VTy };
  7030. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7031. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7032. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7033. }
  7034. case NEON::BI__builtin_neon_vminv_s8: {
  7035. Int = Intrinsic::aarch64_neon_sminv;
  7036. Ty = Int32Ty;
  7037. VTy = llvm::VectorType::get(Int8Ty, 8);
  7038. llvm::Type *Tys[2] = { Ty, VTy };
  7039. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7040. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7041. return Builder.CreateTrunc(Ops[0], Int8Ty);
  7042. }
  7043. case NEON::BI__builtin_neon_vminv_s16: {
  7044. Int = Intrinsic::aarch64_neon_sminv;
  7045. Ty = Int32Ty;
  7046. VTy = llvm::VectorType::get(Int16Ty, 4);
  7047. llvm::Type *Tys[2] = { Ty, VTy };
  7048. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7049. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7050. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7051. }
  7052. case NEON::BI__builtin_neon_vminvq_s8: {
  7053. Int = Intrinsic::aarch64_neon_sminv;
  7054. Ty = Int32Ty;
  7055. VTy = llvm::VectorType::get(Int8Ty, 16);
  7056. llvm::Type *Tys[2] = { Ty, VTy };
  7057. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7058. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7059. return Builder.CreateTrunc(Ops[0], Int8Ty);
  7060. }
  7061. case NEON::BI__builtin_neon_vminvq_s16: {
  7062. Int = Intrinsic::aarch64_neon_sminv;
  7063. Ty = Int32Ty;
  7064. VTy = llvm::VectorType::get(Int16Ty, 8);
  7065. llvm::Type *Tys[2] = { Ty, VTy };
  7066. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7067. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7068. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7069. }
  7070. case NEON::BI__builtin_neon_vminv_f16: {
  7071. Int = Intrinsic::aarch64_neon_fminv;
  7072. Ty = HalfTy;
  7073. VTy = llvm::VectorType::get(HalfTy, 4);
  7074. llvm::Type *Tys[2] = { Ty, VTy };
  7075. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7076. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7077. return Builder.CreateTrunc(Ops[0], HalfTy);
  7078. }
  7079. case NEON::BI__builtin_neon_vminvq_f16: {
  7080. Int = Intrinsic::aarch64_neon_fminv;
  7081. Ty = HalfTy;
  7082. VTy = llvm::VectorType::get(HalfTy, 8);
  7083. llvm::Type *Tys[2] = { Ty, VTy };
  7084. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7085. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
  7086. return Builder.CreateTrunc(Ops[0], HalfTy);
  7087. }
  7088. case NEON::BI__builtin_neon_vmaxnmv_f16: {
  7089. Int = Intrinsic::aarch64_neon_fmaxnmv;
  7090. Ty = HalfTy;
  7091. VTy = llvm::VectorType::get(HalfTy, 4);
  7092. llvm::Type *Tys[2] = { Ty, VTy };
  7093. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7094. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  7095. return Builder.CreateTrunc(Ops[0], HalfTy);
  7096. }
  7097. case NEON::BI__builtin_neon_vmaxnmvq_f16: {
  7098. Int = Intrinsic::aarch64_neon_fmaxnmv;
  7099. Ty = HalfTy;
  7100. VTy = llvm::VectorType::get(HalfTy, 8);
  7101. llvm::Type *Tys[2] = { Ty, VTy };
  7102. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7103. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
  7104. return Builder.CreateTrunc(Ops[0], HalfTy);
  7105. }
  7106. case NEON::BI__builtin_neon_vminnmv_f16: {
  7107. Int = Intrinsic::aarch64_neon_fminnmv;
  7108. Ty = HalfTy;
  7109. VTy = llvm::VectorType::get(HalfTy, 4);
  7110. llvm::Type *Tys[2] = { Ty, VTy };
  7111. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7112. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  7113. return Builder.CreateTrunc(Ops[0], HalfTy);
  7114. }
  7115. case NEON::BI__builtin_neon_vminnmvq_f16: {
  7116. Int = Intrinsic::aarch64_neon_fminnmv;
  7117. Ty = HalfTy;
  7118. VTy = llvm::VectorType::get(HalfTy, 8);
  7119. llvm::Type *Tys[2] = { Ty, VTy };
  7120. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7121. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
  7122. return Builder.CreateTrunc(Ops[0], HalfTy);
  7123. }
  7124. case NEON::BI__builtin_neon_vmul_n_f64: {
  7125. Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
  7126. Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
  7127. return Builder.CreateFMul(Ops[0], RHS);
  7128. }
  7129. case NEON::BI__builtin_neon_vaddlv_u8: {
  7130. Int = Intrinsic::aarch64_neon_uaddlv;
  7131. Ty = Int32Ty;
  7132. VTy = llvm::VectorType::get(Int8Ty, 8);
  7133. llvm::Type *Tys[2] = { Ty, VTy };
  7134. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7135. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7136. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7137. }
  7138. case NEON::BI__builtin_neon_vaddlv_u16: {
  7139. Int = Intrinsic::aarch64_neon_uaddlv;
  7140. Ty = Int32Ty;
  7141. VTy = llvm::VectorType::get(Int16Ty, 4);
  7142. llvm::Type *Tys[2] = { Ty, VTy };
  7143. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7144. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7145. }
  7146. case NEON::BI__builtin_neon_vaddlvq_u8: {
  7147. Int = Intrinsic::aarch64_neon_uaddlv;
  7148. Ty = Int32Ty;
  7149. VTy = llvm::VectorType::get(Int8Ty, 16);
  7150. llvm::Type *Tys[2] = { Ty, VTy };
  7151. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7152. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7153. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7154. }
  7155. case NEON::BI__builtin_neon_vaddlvq_u16: {
  7156. Int = Intrinsic::aarch64_neon_uaddlv;
  7157. Ty = Int32Ty;
  7158. VTy = llvm::VectorType::get(Int16Ty, 8);
  7159. llvm::Type *Tys[2] = { Ty, VTy };
  7160. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7161. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7162. }
  7163. case NEON::BI__builtin_neon_vaddlv_s8: {
  7164. Int = Intrinsic::aarch64_neon_saddlv;
  7165. Ty = Int32Ty;
  7166. VTy = llvm::VectorType::get(Int8Ty, 8);
  7167. llvm::Type *Tys[2] = { Ty, VTy };
  7168. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7169. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7170. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7171. }
  7172. case NEON::BI__builtin_neon_vaddlv_s16: {
  7173. Int = Intrinsic::aarch64_neon_saddlv;
  7174. Ty = Int32Ty;
  7175. VTy = llvm::VectorType::get(Int16Ty, 4);
  7176. llvm::Type *Tys[2] = { Ty, VTy };
  7177. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7178. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7179. }
  7180. case NEON::BI__builtin_neon_vaddlvq_s8: {
  7181. Int = Intrinsic::aarch64_neon_saddlv;
  7182. Ty = Int32Ty;
  7183. VTy = llvm::VectorType::get(Int8Ty, 16);
  7184. llvm::Type *Tys[2] = { Ty, VTy };
  7185. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7186. Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7187. return Builder.CreateTrunc(Ops[0], Int16Ty);
  7188. }
  7189. case NEON::BI__builtin_neon_vaddlvq_s16: {
  7190. Int = Intrinsic::aarch64_neon_saddlv;
  7191. Ty = Int32Ty;
  7192. VTy = llvm::VectorType::get(Int16Ty, 8);
  7193. llvm::Type *Tys[2] = { Ty, VTy };
  7194. Ops.push_back(EmitScalarExpr(E->getArg(0)));
  7195. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
  7196. }
  7197. case NEON::BI__builtin_neon_vsri_n_v:
  7198. case NEON::BI__builtin_neon_vsriq_n_v: {
  7199. Int = Intrinsic::aarch64_neon_vsri;
  7200. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  7201. return EmitNeonCall(Intrin, Ops, "vsri_n");
  7202. }
  7203. case NEON::BI__builtin_neon_vsli_n_v:
  7204. case NEON::BI__builtin_neon_vsliq_n_v: {
  7205. Int = Intrinsic::aarch64_neon_vsli;
  7206. llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
  7207. return EmitNeonCall(Intrin, Ops, "vsli_n");
  7208. }
  7209. case NEON::BI__builtin_neon_vsra_n_v:
  7210. case NEON::BI__builtin_neon_vsraq_n_v:
  7211. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7212. Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
  7213. return Builder.CreateAdd(Ops[0], Ops[1]);
  7214. case NEON::BI__builtin_neon_vrsra_n_v:
  7215. case NEON::BI__builtin_neon_vrsraq_n_v: {
  7216. Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
  7217. SmallVector<llvm::Value*,2> TmpOps;
  7218. TmpOps.push_back(Ops[1]);
  7219. TmpOps.push_back(Ops[2]);
  7220. Function* F = CGM.getIntrinsic(Int, Ty);
  7221. llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
  7222. Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
  7223. return Builder.CreateAdd(Ops[0], tmp);
  7224. }
  7225. // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
  7226. // of an Align parameter here.
  7227. case NEON::BI__builtin_neon_vld1_x2_v:
  7228. case NEON::BI__builtin_neon_vld1q_x2_v:
  7229. case NEON::BI__builtin_neon_vld1_x3_v:
  7230. case NEON::BI__builtin_neon_vld1q_x3_v:
  7231. case NEON::BI__builtin_neon_vld1_x4_v:
  7232. case NEON::BI__builtin_neon_vld1q_x4_v: {
  7233. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
  7234. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7235. llvm::Type *Tys[2] = { VTy, PTy };
  7236. unsigned Int;
  7237. switch (BuiltinID) {
  7238. case NEON::BI__builtin_neon_vld1_x2_v:
  7239. case NEON::BI__builtin_neon_vld1q_x2_v:
  7240. Int = Intrinsic::aarch64_neon_ld1x2;
  7241. break;
  7242. case NEON::BI__builtin_neon_vld1_x3_v:
  7243. case NEON::BI__builtin_neon_vld1q_x3_v:
  7244. Int = Intrinsic::aarch64_neon_ld1x3;
  7245. break;
  7246. case NEON::BI__builtin_neon_vld1_x4_v:
  7247. case NEON::BI__builtin_neon_vld1q_x4_v:
  7248. Int = Intrinsic::aarch64_neon_ld1x4;
  7249. break;
  7250. }
  7251. Function *F = CGM.getIntrinsic(Int, Tys);
  7252. Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
  7253. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7254. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7255. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7256. }
  7257. case NEON::BI__builtin_neon_vst1_x2_v:
  7258. case NEON::BI__builtin_neon_vst1q_x2_v:
  7259. case NEON::BI__builtin_neon_vst1_x3_v:
  7260. case NEON::BI__builtin_neon_vst1q_x3_v:
  7261. case NEON::BI__builtin_neon_vst1_x4_v:
  7262. case NEON::BI__builtin_neon_vst1q_x4_v: {
  7263. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
  7264. llvm::Type *Tys[2] = { VTy, PTy };
  7265. unsigned Int;
  7266. switch (BuiltinID) {
  7267. case NEON::BI__builtin_neon_vst1_x2_v:
  7268. case NEON::BI__builtin_neon_vst1q_x2_v:
  7269. Int = Intrinsic::aarch64_neon_st1x2;
  7270. break;
  7271. case NEON::BI__builtin_neon_vst1_x3_v:
  7272. case NEON::BI__builtin_neon_vst1q_x3_v:
  7273. Int = Intrinsic::aarch64_neon_st1x3;
  7274. break;
  7275. case NEON::BI__builtin_neon_vst1_x4_v:
  7276. case NEON::BI__builtin_neon_vst1q_x4_v:
  7277. Int = Intrinsic::aarch64_neon_st1x4;
  7278. break;
  7279. }
  7280. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
  7281. return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
  7282. }
  7283. case NEON::BI__builtin_neon_vld1_v:
  7284. case NEON::BI__builtin_neon_vld1q_v: {
  7285. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  7286. auto Alignment = CharUnits::fromQuantity(
  7287. BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
  7288. return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
  7289. }
  7290. case NEON::BI__builtin_neon_vst1_v:
  7291. case NEON::BI__builtin_neon_vst1q_v:
  7292. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
  7293. Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
  7294. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7295. case NEON::BI__builtin_neon_vld1_lane_v:
  7296. case NEON::BI__builtin_neon_vld1q_lane_v: {
  7297. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7298. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  7299. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7300. auto Alignment = CharUnits::fromQuantity(
  7301. BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
  7302. Ops[0] =
  7303. Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
  7304. return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
  7305. }
  7306. case NEON::BI__builtin_neon_vld1_dup_v:
  7307. case NEON::BI__builtin_neon_vld1q_dup_v: {
  7308. Value *V = UndefValue::get(Ty);
  7309. Ty = llvm::PointerType::getUnqual(VTy->getElementType());
  7310. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7311. auto Alignment = CharUnits::fromQuantity(
  7312. BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
  7313. Ops[0] =
  7314. Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
  7315. llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
  7316. Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
  7317. return EmitNeonSplat(Ops[0], CI);
  7318. }
  7319. case NEON::BI__builtin_neon_vst1_lane_v:
  7320. case NEON::BI__builtin_neon_vst1q_lane_v:
  7321. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7322. Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
  7323. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7324. return Builder.CreateDefaultAlignedStore(Ops[1],
  7325. Builder.CreateBitCast(Ops[0], Ty));
  7326. case NEON::BI__builtin_neon_vld2_v:
  7327. case NEON::BI__builtin_neon_vld2q_v: {
  7328. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  7329. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7330. llvm::Type *Tys[2] = { VTy, PTy };
  7331. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
  7332. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  7333. Ops[0] = Builder.CreateBitCast(Ops[0],
  7334. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7335. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7336. }
  7337. case NEON::BI__builtin_neon_vld3_v:
  7338. case NEON::BI__builtin_neon_vld3q_v: {
  7339. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  7340. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7341. llvm::Type *Tys[2] = { VTy, PTy };
  7342. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
  7343. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  7344. Ops[0] = Builder.CreateBitCast(Ops[0],
  7345. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7346. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7347. }
  7348. case NEON::BI__builtin_neon_vld4_v:
  7349. case NEON::BI__builtin_neon_vld4q_v: {
  7350. llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
  7351. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7352. llvm::Type *Tys[2] = { VTy, PTy };
  7353. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
  7354. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  7355. Ops[0] = Builder.CreateBitCast(Ops[0],
  7356. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7357. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7358. }
  7359. case NEON::BI__builtin_neon_vld2_dup_v:
  7360. case NEON::BI__builtin_neon_vld2q_dup_v: {
  7361. llvm::Type *PTy =
  7362. llvm::PointerType::getUnqual(VTy->getElementType());
  7363. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7364. llvm::Type *Tys[2] = { VTy, PTy };
  7365. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
  7366. Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
  7367. Ops[0] = Builder.CreateBitCast(Ops[0],
  7368. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7369. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7370. }
  7371. case NEON::BI__builtin_neon_vld3_dup_v:
  7372. case NEON::BI__builtin_neon_vld3q_dup_v: {
  7373. llvm::Type *PTy =
  7374. llvm::PointerType::getUnqual(VTy->getElementType());
  7375. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7376. llvm::Type *Tys[2] = { VTy, PTy };
  7377. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
  7378. Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
  7379. Ops[0] = Builder.CreateBitCast(Ops[0],
  7380. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7381. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7382. }
  7383. case NEON::BI__builtin_neon_vld4_dup_v:
  7384. case NEON::BI__builtin_neon_vld4q_dup_v: {
  7385. llvm::Type *PTy =
  7386. llvm::PointerType::getUnqual(VTy->getElementType());
  7387. Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
  7388. llvm::Type *Tys[2] = { VTy, PTy };
  7389. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
  7390. Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
  7391. Ops[0] = Builder.CreateBitCast(Ops[0],
  7392. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7393. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7394. }
  7395. case NEON::BI__builtin_neon_vld2_lane_v:
  7396. case NEON::BI__builtin_neon_vld2q_lane_v: {
  7397. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  7398. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
  7399. Ops.push_back(Ops[1]);
  7400. Ops.erase(Ops.begin()+1);
  7401. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7402. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7403. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  7404. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
  7405. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7406. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7407. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7408. }
  7409. case NEON::BI__builtin_neon_vld3_lane_v:
  7410. case NEON::BI__builtin_neon_vld3q_lane_v: {
  7411. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  7412. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
  7413. Ops.push_back(Ops[1]);
  7414. Ops.erase(Ops.begin()+1);
  7415. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7416. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7417. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  7418. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  7419. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
  7420. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7421. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7422. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7423. }
  7424. case NEON::BI__builtin_neon_vld4_lane_v:
  7425. case NEON::BI__builtin_neon_vld4q_lane_v: {
  7426. llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
  7427. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
  7428. Ops.push_back(Ops[1]);
  7429. Ops.erase(Ops.begin()+1);
  7430. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7431. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7432. Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
  7433. Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
  7434. Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
  7435. Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
  7436. Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
  7437. Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
  7438. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  7439. }
  7440. case NEON::BI__builtin_neon_vst2_v:
  7441. case NEON::BI__builtin_neon_vst2q_v: {
  7442. Ops.push_back(Ops[0]);
  7443. Ops.erase(Ops.begin());
  7444. llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
  7445. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
  7446. Ops, "");
  7447. }
  7448. case NEON::BI__builtin_neon_vst2_lane_v:
  7449. case NEON::BI__builtin_neon_vst2q_lane_v: {
  7450. Ops.push_back(Ops[0]);
  7451. Ops.erase(Ops.begin());
  7452. Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
  7453. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  7454. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
  7455. Ops, "");
  7456. }
  7457. case NEON::BI__builtin_neon_vst3_v:
  7458. case NEON::BI__builtin_neon_vst3q_v: {
  7459. Ops.push_back(Ops[0]);
  7460. Ops.erase(Ops.begin());
  7461. llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
  7462. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
  7463. Ops, "");
  7464. }
  7465. case NEON::BI__builtin_neon_vst3_lane_v:
  7466. case NEON::BI__builtin_neon_vst3q_lane_v: {
  7467. Ops.push_back(Ops[0]);
  7468. Ops.erase(Ops.begin());
  7469. Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
  7470. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  7471. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
  7472. Ops, "");
  7473. }
  7474. case NEON::BI__builtin_neon_vst4_v:
  7475. case NEON::BI__builtin_neon_vst4q_v: {
  7476. Ops.push_back(Ops[0]);
  7477. Ops.erase(Ops.begin());
  7478. llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
  7479. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
  7480. Ops, "");
  7481. }
  7482. case NEON::BI__builtin_neon_vst4_lane_v:
  7483. case NEON::BI__builtin_neon_vst4q_lane_v: {
  7484. Ops.push_back(Ops[0]);
  7485. Ops.erase(Ops.begin());
  7486. Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
  7487. llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
  7488. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
  7489. Ops, "");
  7490. }
  7491. case NEON::BI__builtin_neon_vtrn_v:
  7492. case NEON::BI__builtin_neon_vtrnq_v: {
  7493. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  7494. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7495. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7496. Value *SV = nullptr;
  7497. for (unsigned vi = 0; vi != 2; ++vi) {
  7498. SmallVector<uint32_t, 16> Indices;
  7499. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  7500. Indices.push_back(i+vi);
  7501. Indices.push_back(i+e+vi);
  7502. }
  7503. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  7504. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
  7505. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  7506. }
  7507. return SV;
  7508. }
  7509. case NEON::BI__builtin_neon_vuzp_v:
  7510. case NEON::BI__builtin_neon_vuzpq_v: {
  7511. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  7512. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7513. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7514. Value *SV = nullptr;
  7515. for (unsigned vi = 0; vi != 2; ++vi) {
  7516. SmallVector<uint32_t, 16> Indices;
  7517. for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
  7518. Indices.push_back(2*i+vi);
  7519. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  7520. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
  7521. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  7522. }
  7523. return SV;
  7524. }
  7525. case NEON::BI__builtin_neon_vzip_v:
  7526. case NEON::BI__builtin_neon_vzipq_v: {
  7527. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
  7528. Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
  7529. Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
  7530. Value *SV = nullptr;
  7531. for (unsigned vi = 0; vi != 2; ++vi) {
  7532. SmallVector<uint32_t, 16> Indices;
  7533. for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
  7534. Indices.push_back((i + vi*e) >> 1);
  7535. Indices.push_back(((i + vi*e) >> 1)+e);
  7536. }
  7537. Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
  7538. SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
  7539. SV = Builder.CreateDefaultAlignedStore(SV, Addr);
  7540. }
  7541. return SV;
  7542. }
  7543. case NEON::BI__builtin_neon_vqtbl1q_v: {
  7544. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
  7545. Ops, "vtbl1");
  7546. }
  7547. case NEON::BI__builtin_neon_vqtbl2q_v: {
  7548. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
  7549. Ops, "vtbl2");
  7550. }
  7551. case NEON::BI__builtin_neon_vqtbl3q_v: {
  7552. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
  7553. Ops, "vtbl3");
  7554. }
  7555. case NEON::BI__builtin_neon_vqtbl4q_v: {
  7556. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
  7557. Ops, "vtbl4");
  7558. }
  7559. case NEON::BI__builtin_neon_vqtbx1q_v: {
  7560. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
  7561. Ops, "vtbx1");
  7562. }
  7563. case NEON::BI__builtin_neon_vqtbx2q_v: {
  7564. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
  7565. Ops, "vtbx2");
  7566. }
  7567. case NEON::BI__builtin_neon_vqtbx3q_v: {
  7568. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
  7569. Ops, "vtbx3");
  7570. }
  7571. case NEON::BI__builtin_neon_vqtbx4q_v: {
  7572. return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
  7573. Ops, "vtbx4");
  7574. }
  7575. case NEON::BI__builtin_neon_vsqadd_v:
  7576. case NEON::BI__builtin_neon_vsqaddq_v: {
  7577. Int = Intrinsic::aarch64_neon_usqadd;
  7578. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
  7579. }
  7580. case NEON::BI__builtin_neon_vuqadd_v:
  7581. case NEON::BI__builtin_neon_vuqaddq_v: {
  7582. Int = Intrinsic::aarch64_neon_suqadd;
  7583. return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
  7584. }
  7585. }
  7586. }
  7587. llvm::Value *CodeGenFunction::
  7588. BuildVector(ArrayRef<llvm::Value*> Ops) {
  7589. assert((Ops.size() & (Ops.size() - 1)) == 0 &&
  7590. "Not a power-of-two sized vector!");
  7591. bool AllConstants = true;
  7592. for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
  7593. AllConstants &= isa<Constant>(Ops[i]);
  7594. // If this is a constant vector, create a ConstantVector.
  7595. if (AllConstants) {
  7596. SmallVector<llvm::Constant*, 16> CstOps;
  7597. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  7598. CstOps.push_back(cast<Constant>(Ops[i]));
  7599. return llvm::ConstantVector::get(CstOps);
  7600. }
  7601. // Otherwise, insertelement the values to build the vector.
  7602. Value *Result =
  7603. llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
  7604. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  7605. Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
  7606. return Result;
  7607. }
  7608. // Convert the mask from an integer type to a vector of i1.
  7609. static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
  7610. unsigned NumElts) {
  7611. llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
  7612. cast<IntegerType>(Mask->getType())->getBitWidth());
  7613. Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
  7614. // If we have less than 8 elements, then the starting mask was an i8 and
  7615. // we need to extract down to the right number of elements.
  7616. if (NumElts < 8) {
  7617. uint32_t Indices[4];
  7618. for (unsigned i = 0; i != NumElts; ++i)
  7619. Indices[i] = i;
  7620. MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
  7621. makeArrayRef(Indices, NumElts),
  7622. "extract");
  7623. }
  7624. return MaskVec;
  7625. }
  7626. static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
  7627. SmallVectorImpl<Value *> &Ops,
  7628. unsigned Align) {
  7629. // Cast the pointer to right type.
  7630. Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
  7631. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7632. // If the mask is all ones just emit a regular store.
  7633. if (const auto *C = dyn_cast<Constant>(Ops[2]))
  7634. if (C->isAllOnesValue())
  7635. return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
  7636. Value *MaskVec = getMaskVecValue(CGF, Ops[2],
  7637. Ops[1]->getType()->getVectorNumElements());
  7638. return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
  7639. }
  7640. static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
  7641. SmallVectorImpl<Value *> &Ops, unsigned Align) {
  7642. // Cast the pointer to right type.
  7643. Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
  7644. llvm::PointerType::getUnqual(Ops[1]->getType()));
  7645. // If the mask is all ones just emit a regular store.
  7646. if (const auto *C = dyn_cast<Constant>(Ops[2]))
  7647. if (C->isAllOnesValue())
  7648. return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
  7649. Value *MaskVec = getMaskVecValue(CGF, Ops[2],
  7650. Ops[1]->getType()->getVectorNumElements());
  7651. return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
  7652. }
  7653. static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
  7654. unsigned NumElts, SmallVectorImpl<Value *> &Ops,
  7655. bool InvertLHS = false) {
  7656. Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
  7657. Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
  7658. if (InvertLHS)
  7659. LHS = CGF.Builder.CreateNot(LHS);
  7660. return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
  7661. CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
  7662. }
  7663. static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
  7664. SmallVectorImpl<Value *> &Ops,
  7665. llvm::Type *DstTy,
  7666. unsigned SrcSizeInBits,
  7667. unsigned Align) {
  7668. // Load the subvector.
  7669. Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
  7670. // Create broadcast mask.
  7671. unsigned NumDstElts = DstTy->getVectorNumElements();
  7672. unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
  7673. SmallVector<uint32_t, 8> Mask;
  7674. for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
  7675. for (unsigned j = 0; j != NumSrcElts; ++j)
  7676. Mask.push_back(j);
  7677. return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
  7678. }
  7679. static Value *EmitX86Select(CodeGenFunction &CGF,
  7680. Value *Mask, Value *Op0, Value *Op1) {
  7681. // If the mask is all ones just return first argument.
  7682. if (const auto *C = dyn_cast<Constant>(Mask))
  7683. if (C->isAllOnesValue())
  7684. return Op0;
  7685. Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
  7686. return CGF.Builder.CreateSelect(Mask, Op0, Op1);
  7687. }
  7688. static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
  7689. unsigned NumElts, Value *MaskIn) {
  7690. if (MaskIn) {
  7691. const auto *C = dyn_cast<Constant>(MaskIn);
  7692. if (!C || !C->isAllOnesValue())
  7693. Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
  7694. }
  7695. if (NumElts < 8) {
  7696. uint32_t Indices[8];
  7697. for (unsigned i = 0; i != NumElts; ++i)
  7698. Indices[i] = i;
  7699. for (unsigned i = NumElts; i != 8; ++i)
  7700. Indices[i] = i % NumElts + NumElts;
  7701. Cmp = CGF.Builder.CreateShuffleVector(
  7702. Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
  7703. }
  7704. return CGF.Builder.CreateBitCast(Cmp,
  7705. IntegerType::get(CGF.getLLVMContext(),
  7706. std::max(NumElts, 8U)));
  7707. }
  7708. static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
  7709. bool Signed, ArrayRef<Value *> Ops) {
  7710. assert((Ops.size() == 2 || Ops.size() == 4) &&
  7711. "Unexpected number of arguments");
  7712. unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
  7713. Value *Cmp;
  7714. if (CC == 3) {
  7715. Cmp = Constant::getNullValue(
  7716. llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  7717. } else if (CC == 7) {
  7718. Cmp = Constant::getAllOnesValue(
  7719. llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
  7720. } else {
  7721. ICmpInst::Predicate Pred;
  7722. switch (CC) {
  7723. default: llvm_unreachable("Unknown condition code");
  7724. case 0: Pred = ICmpInst::ICMP_EQ; break;
  7725. case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
  7726. case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
  7727. case 4: Pred = ICmpInst::ICMP_NE; break;
  7728. case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
  7729. case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
  7730. }
  7731. Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
  7732. }
  7733. Value *MaskIn = nullptr;
  7734. if (Ops.size() == 4)
  7735. MaskIn = Ops[3];
  7736. return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
  7737. }
  7738. static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
  7739. Value *Zero = Constant::getNullValue(In->getType());
  7740. return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
  7741. }
  7742. static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
  7743. llvm::Type *Ty = Ops[0]->getType();
  7744. Value *Zero = llvm::Constant::getNullValue(Ty);
  7745. Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
  7746. Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
  7747. Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
  7748. if (Ops.size() == 1)
  7749. return Res;
  7750. return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
  7751. }
  7752. static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
  7753. ArrayRef<Value *> Ops) {
  7754. Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
  7755. Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
  7756. if (Ops.size() == 2)
  7757. return Res;
  7758. assert(Ops.size() == 4);
  7759. return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
  7760. }
  7761. static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
  7762. ArrayRef<Value *> Ops) {
  7763. llvm::Type *Ty = Ops[0]->getType();
  7764. // Arguments have a vXi32 type so cast to vXi64.
  7765. Ty = llvm::VectorType::get(CGF.Int64Ty,
  7766. Ty->getPrimitiveSizeInBits() / 64);
  7767. Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
  7768. Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
  7769. if (IsSigned) {
  7770. // Shift left then arithmetic shift right.
  7771. Constant *ShiftAmt = ConstantInt::get(Ty, 32);
  7772. LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
  7773. LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
  7774. RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
  7775. RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
  7776. } else {
  7777. // Clear the upper bits.
  7778. Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
  7779. LHS = CGF.Builder.CreateAnd(LHS, Mask);
  7780. RHS = CGF.Builder.CreateAnd(RHS, Mask);
  7781. }
  7782. return CGF.Builder.CreateMul(LHS, RHS);
  7783. }
  7784. static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
  7785. llvm::Type *DstTy) {
  7786. unsigned NumberOfElements = DstTy->getVectorNumElements();
  7787. Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
  7788. return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
  7789. }
  7790. Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
  7791. const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
  7792. StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
  7793. return EmitX86CpuIs(CPUStr);
  7794. }
  7795. Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
  7796. llvm::Type *Int32Ty = Builder.getInt32Ty();
  7797. // Matching the struct layout from the compiler-rt/libgcc structure that is
  7798. // filled in:
  7799. // unsigned int __cpu_vendor;
  7800. // unsigned int __cpu_type;
  7801. // unsigned int __cpu_subtype;
  7802. // unsigned int __cpu_features[1];
  7803. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  7804. llvm::ArrayType::get(Int32Ty, 1));
  7805. // Grab the global __cpu_model.
  7806. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  7807. // Calculate the index needed to access the correct field based on the
  7808. // range. Also adjust the expected value.
  7809. unsigned Index;
  7810. unsigned Value;
  7811. std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
  7812. #define X86_VENDOR(ENUM, STRING) \
  7813. .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
  7814. #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
  7815. .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  7816. #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
  7817. .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
  7818. #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
  7819. .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
  7820. #include "llvm/Support/X86TargetParser.def"
  7821. .Default({0, 0});
  7822. assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
  7823. // Grab the appropriate field from __cpu_model.
  7824. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
  7825. ConstantInt::get(Int32Ty, Index)};
  7826. llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
  7827. CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
  7828. // Check the value of the field against the requested value.
  7829. return Builder.CreateICmpEQ(CpuValue,
  7830. llvm::ConstantInt::get(Int32Ty, Value));
  7831. }
  7832. Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
  7833. const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
  7834. StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
  7835. return EmitX86CpuSupports(FeatureStr);
  7836. }
  7837. Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
  7838. // Processor features and mapping to processor feature value.
  7839. uint32_t FeaturesMask = 0;
  7840. for (const StringRef &FeatureStr : FeatureStrs) {
  7841. unsigned Feature =
  7842. StringSwitch<unsigned>(FeatureStr)
  7843. #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
  7844. #include "llvm/Support/X86TargetParser.def"
  7845. ;
  7846. FeaturesMask |= (1U << Feature);
  7847. }
  7848. // Matching the struct layout from the compiler-rt/libgcc structure that is
  7849. // filled in:
  7850. // unsigned int __cpu_vendor;
  7851. // unsigned int __cpu_type;
  7852. // unsigned int __cpu_subtype;
  7853. // unsigned int __cpu_features[1];
  7854. llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
  7855. llvm::ArrayType::get(Int32Ty, 1));
  7856. // Grab the global __cpu_model.
  7857. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
  7858. // Grab the first (0th) element from the field __cpu_features off of the
  7859. // global in the struct STy.
  7860. Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
  7861. ConstantInt::get(Int32Ty, 0)};
  7862. Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
  7863. Value *Features =
  7864. Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
  7865. // Check the value of the bit corresponding to the feature requested.
  7866. Value *Bitset = Builder.CreateAnd(
  7867. Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
  7868. return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
  7869. }
  7870. Value *CodeGenFunction::EmitX86CpuInit() {
  7871. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
  7872. /*Variadic*/ false);
  7873. llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
  7874. return Builder.CreateCall(Func);
  7875. }
  7876. Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
  7877. const CallExpr *E) {
  7878. if (BuiltinID == X86::BI__builtin_cpu_is)
  7879. return EmitX86CpuIs(E);
  7880. if (BuiltinID == X86::BI__builtin_cpu_supports)
  7881. return EmitX86CpuSupports(E);
  7882. if (BuiltinID == X86::BI__builtin_cpu_init)
  7883. return EmitX86CpuInit();
  7884. SmallVector<Value*, 4> Ops;
  7885. // Find out if any arguments are required to be integer constant expressions.
  7886. unsigned ICEArguments = 0;
  7887. ASTContext::GetBuiltinTypeError Error;
  7888. getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
  7889. assert(Error == ASTContext::GE_None && "Should not codegen an error");
  7890. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
  7891. // If this is a normal argument, just emit it as a scalar.
  7892. if ((ICEArguments & (1 << i)) == 0) {
  7893. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  7894. continue;
  7895. }
  7896. // If this is required to be a constant, constant fold it so that we know
  7897. // that the generated intrinsic gets a ConstantInt.
  7898. llvm::APSInt Result;
  7899. bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
  7900. assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
  7901. Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
  7902. }
  7903. // These exist so that the builtin that takes an immediate can be bounds
  7904. // checked by clang to avoid passing bad immediates to the backend. Since
  7905. // AVX has a larger immediate than SSE we would need separate builtins to
  7906. // do the different bounds checking. Rather than create a clang specific
  7907. // SSE only builtin, this implements eight separate builtins to match gcc
  7908. // implementation.
  7909. auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
  7910. Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
  7911. llvm::Function *F = CGM.getIntrinsic(ID);
  7912. return Builder.CreateCall(F, Ops);
  7913. };
  7914. // For the vector forms of FP comparisons, translate the builtins directly to
  7915. // IR.
  7916. // TODO: The builtins could be removed if the SSE header files used vector
  7917. // extension comparisons directly (vector ordered/unordered may need
  7918. // additional support via __builtin_isnan()).
  7919. auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
  7920. Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
  7921. llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
  7922. llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
  7923. Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
  7924. return Builder.CreateBitCast(Sext, FPVecTy);
  7925. };
  7926. switch (BuiltinID) {
  7927. default: return nullptr;
  7928. case X86::BI_mm_prefetch: {
  7929. Value *Address = Ops[0];
  7930. ConstantInt *C = cast<ConstantInt>(Ops[1]);
  7931. Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
  7932. Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
  7933. Value *Data = ConstantInt::get(Int32Ty, 1);
  7934. Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
  7935. return Builder.CreateCall(F, {Address, RW, Locality, Data});
  7936. }
  7937. case X86::BI_mm_clflush: {
  7938. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
  7939. Ops[0]);
  7940. }
  7941. case X86::BI_mm_lfence: {
  7942. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
  7943. }
  7944. case X86::BI_mm_mfence: {
  7945. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
  7946. }
  7947. case X86::BI_mm_sfence: {
  7948. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
  7949. }
  7950. case X86::BI_mm_pause: {
  7951. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
  7952. }
  7953. case X86::BI__rdtsc: {
  7954. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
  7955. }
  7956. case X86::BI__builtin_ia32_undef128:
  7957. case X86::BI__builtin_ia32_undef256:
  7958. case X86::BI__builtin_ia32_undef512:
  7959. // The x86 definition of "undef" is not the same as the LLVM definition
  7960. // (PR32176). We leave optimizing away an unnecessary zero constant to the
  7961. // IR optimizer and backend.
  7962. // TODO: If we had a "freeze" IR instruction to generate a fixed undef
  7963. // value, we should use that here instead of a zero.
  7964. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  7965. case X86::BI__builtin_ia32_vec_init_v8qi:
  7966. case X86::BI__builtin_ia32_vec_init_v4hi:
  7967. case X86::BI__builtin_ia32_vec_init_v2si:
  7968. return Builder.CreateBitCast(BuildVector(Ops),
  7969. llvm::Type::getX86_MMXTy(getLLVMContext()));
  7970. case X86::BI__builtin_ia32_vec_ext_v2si:
  7971. return Builder.CreateExtractElement(Ops[0],
  7972. llvm::ConstantInt::get(Ops[1]->getType(), 0));
  7973. case X86::BI_mm_setcsr:
  7974. case X86::BI__builtin_ia32_ldmxcsr: {
  7975. Address Tmp = CreateMemTemp(E->getArg(0)->getType());
  7976. Builder.CreateStore(Ops[0], Tmp);
  7977. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
  7978. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  7979. }
  7980. case X86::BI_mm_getcsr:
  7981. case X86::BI__builtin_ia32_stmxcsr: {
  7982. Address Tmp = CreateMemTemp(E->getType());
  7983. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
  7984. Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
  7985. return Builder.CreateLoad(Tmp, "stmxcsr");
  7986. }
  7987. case X86::BI__builtin_ia32_xsave:
  7988. case X86::BI__builtin_ia32_xsave64:
  7989. case X86::BI__builtin_ia32_xrstor:
  7990. case X86::BI__builtin_ia32_xrstor64:
  7991. case X86::BI__builtin_ia32_xsaveopt:
  7992. case X86::BI__builtin_ia32_xsaveopt64:
  7993. case X86::BI__builtin_ia32_xrstors:
  7994. case X86::BI__builtin_ia32_xrstors64:
  7995. case X86::BI__builtin_ia32_xsavec:
  7996. case X86::BI__builtin_ia32_xsavec64:
  7997. case X86::BI__builtin_ia32_xsaves:
  7998. case X86::BI__builtin_ia32_xsaves64: {
  7999. Intrinsic::ID ID;
  8000. #define INTRINSIC_X86_XSAVE_ID(NAME) \
  8001. case X86::BI__builtin_ia32_##NAME: \
  8002. ID = Intrinsic::x86_##NAME; \
  8003. break
  8004. switch (BuiltinID) {
  8005. default: llvm_unreachable("Unsupported intrinsic!");
  8006. INTRINSIC_X86_XSAVE_ID(xsave);
  8007. INTRINSIC_X86_XSAVE_ID(xsave64);
  8008. INTRINSIC_X86_XSAVE_ID(xrstor);
  8009. INTRINSIC_X86_XSAVE_ID(xrstor64);
  8010. INTRINSIC_X86_XSAVE_ID(xsaveopt);
  8011. INTRINSIC_X86_XSAVE_ID(xsaveopt64);
  8012. INTRINSIC_X86_XSAVE_ID(xrstors);
  8013. INTRINSIC_X86_XSAVE_ID(xrstors64);
  8014. INTRINSIC_X86_XSAVE_ID(xsavec);
  8015. INTRINSIC_X86_XSAVE_ID(xsavec64);
  8016. INTRINSIC_X86_XSAVE_ID(xsaves);
  8017. INTRINSIC_X86_XSAVE_ID(xsaves64);
  8018. }
  8019. #undef INTRINSIC_X86_XSAVE_ID
  8020. Value *Mhi = Builder.CreateTrunc(
  8021. Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
  8022. Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
  8023. Ops[1] = Mhi;
  8024. Ops.push_back(Mlo);
  8025. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  8026. }
  8027. case X86::BI__builtin_ia32_storedqudi128_mask:
  8028. case X86::BI__builtin_ia32_storedqusi128_mask:
  8029. case X86::BI__builtin_ia32_storedquhi128_mask:
  8030. case X86::BI__builtin_ia32_storedquqi128_mask:
  8031. case X86::BI__builtin_ia32_storeupd128_mask:
  8032. case X86::BI__builtin_ia32_storeups128_mask:
  8033. case X86::BI__builtin_ia32_storedqudi256_mask:
  8034. case X86::BI__builtin_ia32_storedqusi256_mask:
  8035. case X86::BI__builtin_ia32_storedquhi256_mask:
  8036. case X86::BI__builtin_ia32_storedquqi256_mask:
  8037. case X86::BI__builtin_ia32_storeupd256_mask:
  8038. case X86::BI__builtin_ia32_storeups256_mask:
  8039. case X86::BI__builtin_ia32_storedqudi512_mask:
  8040. case X86::BI__builtin_ia32_storedqusi512_mask:
  8041. case X86::BI__builtin_ia32_storedquhi512_mask:
  8042. case X86::BI__builtin_ia32_storedquqi512_mask:
  8043. case X86::BI__builtin_ia32_storeupd512_mask:
  8044. case X86::BI__builtin_ia32_storeups512_mask:
  8045. return EmitX86MaskedStore(*this, Ops, 1);
  8046. case X86::BI__builtin_ia32_storess128_mask:
  8047. case X86::BI__builtin_ia32_storesd128_mask: {
  8048. return EmitX86MaskedStore(*this, Ops, 16);
  8049. }
  8050. case X86::BI__builtin_ia32_vpopcntb_128:
  8051. case X86::BI__builtin_ia32_vpopcntd_128:
  8052. case X86::BI__builtin_ia32_vpopcntq_128:
  8053. case X86::BI__builtin_ia32_vpopcntw_128:
  8054. case X86::BI__builtin_ia32_vpopcntb_256:
  8055. case X86::BI__builtin_ia32_vpopcntd_256:
  8056. case X86::BI__builtin_ia32_vpopcntq_256:
  8057. case X86::BI__builtin_ia32_vpopcntw_256:
  8058. case X86::BI__builtin_ia32_vpopcntb_512:
  8059. case X86::BI__builtin_ia32_vpopcntd_512:
  8060. case X86::BI__builtin_ia32_vpopcntq_512:
  8061. case X86::BI__builtin_ia32_vpopcntw_512: {
  8062. llvm::Type *ResultType = ConvertType(E->getType());
  8063. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  8064. return Builder.CreateCall(F, Ops);
  8065. }
  8066. case X86::BI__builtin_ia32_cvtmask2b128:
  8067. case X86::BI__builtin_ia32_cvtmask2b256:
  8068. case X86::BI__builtin_ia32_cvtmask2b512:
  8069. case X86::BI__builtin_ia32_cvtmask2w128:
  8070. case X86::BI__builtin_ia32_cvtmask2w256:
  8071. case X86::BI__builtin_ia32_cvtmask2w512:
  8072. case X86::BI__builtin_ia32_cvtmask2d128:
  8073. case X86::BI__builtin_ia32_cvtmask2d256:
  8074. case X86::BI__builtin_ia32_cvtmask2d512:
  8075. case X86::BI__builtin_ia32_cvtmask2q128:
  8076. case X86::BI__builtin_ia32_cvtmask2q256:
  8077. case X86::BI__builtin_ia32_cvtmask2q512:
  8078. return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
  8079. case X86::BI__builtin_ia32_cvtb2mask128:
  8080. case X86::BI__builtin_ia32_cvtb2mask256:
  8081. case X86::BI__builtin_ia32_cvtb2mask512:
  8082. case X86::BI__builtin_ia32_cvtw2mask128:
  8083. case X86::BI__builtin_ia32_cvtw2mask256:
  8084. case X86::BI__builtin_ia32_cvtw2mask512:
  8085. case X86::BI__builtin_ia32_cvtd2mask128:
  8086. case X86::BI__builtin_ia32_cvtd2mask256:
  8087. case X86::BI__builtin_ia32_cvtd2mask512:
  8088. case X86::BI__builtin_ia32_cvtq2mask128:
  8089. case X86::BI__builtin_ia32_cvtq2mask256:
  8090. case X86::BI__builtin_ia32_cvtq2mask512:
  8091. return EmitX86ConvertToMask(*this, Ops[0]);
  8092. case X86::BI__builtin_ia32_movdqa32store128_mask:
  8093. case X86::BI__builtin_ia32_movdqa64store128_mask:
  8094. case X86::BI__builtin_ia32_storeaps128_mask:
  8095. case X86::BI__builtin_ia32_storeapd128_mask:
  8096. case X86::BI__builtin_ia32_movdqa32store256_mask:
  8097. case X86::BI__builtin_ia32_movdqa64store256_mask:
  8098. case X86::BI__builtin_ia32_storeaps256_mask:
  8099. case X86::BI__builtin_ia32_storeapd256_mask:
  8100. case X86::BI__builtin_ia32_movdqa32store512_mask:
  8101. case X86::BI__builtin_ia32_movdqa64store512_mask:
  8102. case X86::BI__builtin_ia32_storeaps512_mask:
  8103. case X86::BI__builtin_ia32_storeapd512_mask: {
  8104. unsigned Align =
  8105. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
  8106. return EmitX86MaskedStore(*this, Ops, Align);
  8107. }
  8108. case X86::BI__builtin_ia32_loadups128_mask:
  8109. case X86::BI__builtin_ia32_loadups256_mask:
  8110. case X86::BI__builtin_ia32_loadups512_mask:
  8111. case X86::BI__builtin_ia32_loadupd128_mask:
  8112. case X86::BI__builtin_ia32_loadupd256_mask:
  8113. case X86::BI__builtin_ia32_loadupd512_mask:
  8114. case X86::BI__builtin_ia32_loaddquqi128_mask:
  8115. case X86::BI__builtin_ia32_loaddquqi256_mask:
  8116. case X86::BI__builtin_ia32_loaddquqi512_mask:
  8117. case X86::BI__builtin_ia32_loaddquhi128_mask:
  8118. case X86::BI__builtin_ia32_loaddquhi256_mask:
  8119. case X86::BI__builtin_ia32_loaddquhi512_mask:
  8120. case X86::BI__builtin_ia32_loaddqusi128_mask:
  8121. case X86::BI__builtin_ia32_loaddqusi256_mask:
  8122. case X86::BI__builtin_ia32_loaddqusi512_mask:
  8123. case X86::BI__builtin_ia32_loaddqudi128_mask:
  8124. case X86::BI__builtin_ia32_loaddqudi256_mask:
  8125. case X86::BI__builtin_ia32_loaddqudi512_mask:
  8126. return EmitX86MaskedLoad(*this, Ops, 1);
  8127. case X86::BI__builtin_ia32_loadss128_mask:
  8128. case X86::BI__builtin_ia32_loadsd128_mask:
  8129. return EmitX86MaskedLoad(*this, Ops, 16);
  8130. case X86::BI__builtin_ia32_loadaps128_mask:
  8131. case X86::BI__builtin_ia32_loadaps256_mask:
  8132. case X86::BI__builtin_ia32_loadaps512_mask:
  8133. case X86::BI__builtin_ia32_loadapd128_mask:
  8134. case X86::BI__builtin_ia32_loadapd256_mask:
  8135. case X86::BI__builtin_ia32_loadapd512_mask:
  8136. case X86::BI__builtin_ia32_movdqa32load128_mask:
  8137. case X86::BI__builtin_ia32_movdqa32load256_mask:
  8138. case X86::BI__builtin_ia32_movdqa32load512_mask:
  8139. case X86::BI__builtin_ia32_movdqa64load128_mask:
  8140. case X86::BI__builtin_ia32_movdqa64load256_mask:
  8141. case X86::BI__builtin_ia32_movdqa64load512_mask: {
  8142. unsigned Align =
  8143. getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
  8144. return EmitX86MaskedLoad(*this, Ops, Align);
  8145. }
  8146. case X86::BI__builtin_ia32_vbroadcastf128_pd256:
  8147. case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
  8148. llvm::Type *DstTy = ConvertType(E->getType());
  8149. return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
  8150. }
  8151. case X86::BI__builtin_ia32_storehps:
  8152. case X86::BI__builtin_ia32_storelps: {
  8153. llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
  8154. llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
  8155. // cast val v2i64
  8156. Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
  8157. // extract (0, 1)
  8158. unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
  8159. llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
  8160. Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
  8161. // cast pointer to i64 & store
  8162. Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
  8163. return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
  8164. }
  8165. case X86::BI__builtin_ia32_palignr128:
  8166. case X86::BI__builtin_ia32_palignr256:
  8167. case X86::BI__builtin_ia32_palignr512_mask: {
  8168. unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  8169. unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
  8170. assert(NumElts % 16 == 0);
  8171. // If palignr is shifting the pair of vectors more than the size of two
  8172. // lanes, emit zero.
  8173. if (ShiftVal >= 32)
  8174. return llvm::Constant::getNullValue(ConvertType(E->getType()));
  8175. // If palignr is shifting the pair of input vectors more than one lane,
  8176. // but less than two lanes, convert to shifting in zeroes.
  8177. if (ShiftVal > 16) {
  8178. ShiftVal -= 16;
  8179. Ops[1] = Ops[0];
  8180. Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
  8181. }
  8182. uint32_t Indices[64];
  8183. // 256-bit palignr operates on 128-bit lanes so we need to handle that
  8184. for (unsigned l = 0; l != NumElts; l += 16) {
  8185. for (unsigned i = 0; i != 16; ++i) {
  8186. unsigned Idx = ShiftVal + i;
  8187. if (Idx >= 16)
  8188. Idx += NumElts - 16; // End of lane, switch operand.
  8189. Indices[l + i] = Idx + l;
  8190. }
  8191. }
  8192. Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
  8193. makeArrayRef(Indices, NumElts),
  8194. "palignr");
  8195. // If this isn't a masked builtin, just return the align operation.
  8196. if (Ops.size() == 3)
  8197. return Align;
  8198. return EmitX86Select(*this, Ops[4], Align, Ops[3]);
  8199. }
  8200. case X86::BI__builtin_ia32_vperm2f128_pd256:
  8201. case X86::BI__builtin_ia32_vperm2f128_ps256:
  8202. case X86::BI__builtin_ia32_vperm2f128_si256:
  8203. case X86::BI__builtin_ia32_permti256: {
  8204. unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  8205. unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
  8206. // This takes a very simple approach since there are two lanes and a
  8207. // shuffle can have 2 inputs. So we reserve the first input for the first
  8208. // lane and the second input for the second lane. This may result in
  8209. // duplicate sources, but this can be dealt with in the backend.
  8210. Value *OutOps[2];
  8211. uint32_t Indices[8];
  8212. for (unsigned l = 0; l != 2; ++l) {
  8213. // Determine the source for this lane.
  8214. if (Imm & (1 << ((l * 4) + 3)))
  8215. OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
  8216. else if (Imm & (1 << ((l * 4) + 1)))
  8217. OutOps[l] = Ops[1];
  8218. else
  8219. OutOps[l] = Ops[0];
  8220. for (unsigned i = 0; i != NumElts/2; ++i) {
  8221. // Start with ith element of the source for this lane.
  8222. unsigned Idx = (l * NumElts) + i;
  8223. // If bit 0 of the immediate half is set, switch to the high half of
  8224. // the source.
  8225. if (Imm & (1 << (l * 4)))
  8226. Idx += NumElts/2;
  8227. Indices[(l * (NumElts/2)) + i] = Idx;
  8228. }
  8229. }
  8230. return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
  8231. makeArrayRef(Indices, NumElts),
  8232. "vperm");
  8233. }
  8234. case X86::BI__builtin_ia32_movnti:
  8235. case X86::BI__builtin_ia32_movnti64:
  8236. case X86::BI__builtin_ia32_movntsd:
  8237. case X86::BI__builtin_ia32_movntss: {
  8238. llvm::MDNode *Node = llvm::MDNode::get(
  8239. getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
  8240. Value *Ptr = Ops[0];
  8241. Value *Src = Ops[1];
  8242. // Extract the 0'th element of the source vector.
  8243. if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
  8244. BuiltinID == X86::BI__builtin_ia32_movntss)
  8245. Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
  8246. // Convert the type of the pointer to a pointer to the stored type.
  8247. Value *BC = Builder.CreateBitCast(
  8248. Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
  8249. // Unaligned nontemporal store of the scalar value.
  8250. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
  8251. SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
  8252. SI->setAlignment(1);
  8253. return SI;
  8254. }
  8255. case X86::BI__builtin_ia32_selectb_128:
  8256. case X86::BI__builtin_ia32_selectb_256:
  8257. case X86::BI__builtin_ia32_selectb_512:
  8258. case X86::BI__builtin_ia32_selectw_128:
  8259. case X86::BI__builtin_ia32_selectw_256:
  8260. case X86::BI__builtin_ia32_selectw_512:
  8261. case X86::BI__builtin_ia32_selectd_128:
  8262. case X86::BI__builtin_ia32_selectd_256:
  8263. case X86::BI__builtin_ia32_selectd_512:
  8264. case X86::BI__builtin_ia32_selectq_128:
  8265. case X86::BI__builtin_ia32_selectq_256:
  8266. case X86::BI__builtin_ia32_selectq_512:
  8267. case X86::BI__builtin_ia32_selectps_128:
  8268. case X86::BI__builtin_ia32_selectps_256:
  8269. case X86::BI__builtin_ia32_selectps_512:
  8270. case X86::BI__builtin_ia32_selectpd_128:
  8271. case X86::BI__builtin_ia32_selectpd_256:
  8272. case X86::BI__builtin_ia32_selectpd_512:
  8273. return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
  8274. case X86::BI__builtin_ia32_cmpb128_mask:
  8275. case X86::BI__builtin_ia32_cmpb256_mask:
  8276. case X86::BI__builtin_ia32_cmpb512_mask:
  8277. case X86::BI__builtin_ia32_cmpw128_mask:
  8278. case X86::BI__builtin_ia32_cmpw256_mask:
  8279. case X86::BI__builtin_ia32_cmpw512_mask:
  8280. case X86::BI__builtin_ia32_cmpd128_mask:
  8281. case X86::BI__builtin_ia32_cmpd256_mask:
  8282. case X86::BI__builtin_ia32_cmpd512_mask:
  8283. case X86::BI__builtin_ia32_cmpq128_mask:
  8284. case X86::BI__builtin_ia32_cmpq256_mask:
  8285. case X86::BI__builtin_ia32_cmpq512_mask: {
  8286. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  8287. return EmitX86MaskedCompare(*this, CC, true, Ops);
  8288. }
  8289. case X86::BI__builtin_ia32_ucmpb128_mask:
  8290. case X86::BI__builtin_ia32_ucmpb256_mask:
  8291. case X86::BI__builtin_ia32_ucmpb512_mask:
  8292. case X86::BI__builtin_ia32_ucmpw128_mask:
  8293. case X86::BI__builtin_ia32_ucmpw256_mask:
  8294. case X86::BI__builtin_ia32_ucmpw512_mask:
  8295. case X86::BI__builtin_ia32_ucmpd128_mask:
  8296. case X86::BI__builtin_ia32_ucmpd256_mask:
  8297. case X86::BI__builtin_ia32_ucmpd512_mask:
  8298. case X86::BI__builtin_ia32_ucmpq128_mask:
  8299. case X86::BI__builtin_ia32_ucmpq256_mask:
  8300. case X86::BI__builtin_ia32_ucmpq512_mask: {
  8301. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
  8302. return EmitX86MaskedCompare(*this, CC, false, Ops);
  8303. }
  8304. case X86::BI__builtin_ia32_kortestchi:
  8305. case X86::BI__builtin_ia32_kortestzhi: {
  8306. Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
  8307. Value *C;
  8308. if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
  8309. C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
  8310. else
  8311. C = llvm::Constant::getNullValue(Builder.getInt16Ty());
  8312. Value *Cmp = Builder.CreateICmpEQ(Or, C);
  8313. return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
  8314. }
  8315. case X86::BI__builtin_ia32_kandhi:
  8316. return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
  8317. case X86::BI__builtin_ia32_kandnhi:
  8318. return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
  8319. case X86::BI__builtin_ia32_korhi:
  8320. return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
  8321. case X86::BI__builtin_ia32_kxnorhi:
  8322. return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
  8323. case X86::BI__builtin_ia32_kxorhi:
  8324. return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
  8325. case X86::BI__builtin_ia32_knothi: {
  8326. Ops[0] = getMaskVecValue(*this, Ops[0], 16);
  8327. return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
  8328. Builder.getInt16Ty());
  8329. }
  8330. case X86::BI__builtin_ia32_kunpckdi:
  8331. case X86::BI__builtin_ia32_kunpcksi:
  8332. case X86::BI__builtin_ia32_kunpckhi: {
  8333. unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
  8334. Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
  8335. Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
  8336. uint32_t Indices[64];
  8337. for (unsigned i = 0; i != NumElts; ++i)
  8338. Indices[i] = i;
  8339. // First extract half of each vector. This gives better codegen than
  8340. // doing it in a single shuffle.
  8341. LHS = Builder.CreateShuffleVector(LHS, LHS,
  8342. makeArrayRef(Indices, NumElts / 2));
  8343. RHS = Builder.CreateShuffleVector(RHS, RHS,
  8344. makeArrayRef(Indices, NumElts / 2));
  8345. // Concat the vectors.
  8346. // NOTE: Operands are swapped to match the intrinsic definition.
  8347. Value *Res = Builder.CreateShuffleVector(RHS, LHS,
  8348. makeArrayRef(Indices, NumElts));
  8349. return Builder.CreateBitCast(Res, Ops[0]->getType());
  8350. }
  8351. case X86::BI__builtin_ia32_vplzcntd_128_mask:
  8352. case X86::BI__builtin_ia32_vplzcntd_256_mask:
  8353. case X86::BI__builtin_ia32_vplzcntd_512_mask:
  8354. case X86::BI__builtin_ia32_vplzcntq_128_mask:
  8355. case X86::BI__builtin_ia32_vplzcntq_256_mask:
  8356. case X86::BI__builtin_ia32_vplzcntq_512_mask: {
  8357. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
  8358. return EmitX86Select(*this, Ops[2],
  8359. Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
  8360. Ops[1]);
  8361. }
  8362. case X86::BI__builtin_ia32_pabsb128:
  8363. case X86::BI__builtin_ia32_pabsw128:
  8364. case X86::BI__builtin_ia32_pabsd128:
  8365. case X86::BI__builtin_ia32_pabsb256:
  8366. case X86::BI__builtin_ia32_pabsw256:
  8367. case X86::BI__builtin_ia32_pabsd256:
  8368. case X86::BI__builtin_ia32_pabsq128_mask:
  8369. case X86::BI__builtin_ia32_pabsq256_mask:
  8370. case X86::BI__builtin_ia32_pabsb512_mask:
  8371. case X86::BI__builtin_ia32_pabsw512_mask:
  8372. case X86::BI__builtin_ia32_pabsd512_mask:
  8373. case X86::BI__builtin_ia32_pabsq512_mask:
  8374. return EmitX86Abs(*this, Ops);
  8375. case X86::BI__builtin_ia32_pmaxsb128:
  8376. case X86::BI__builtin_ia32_pmaxsw128:
  8377. case X86::BI__builtin_ia32_pmaxsd128:
  8378. case X86::BI__builtin_ia32_pmaxsq128_mask:
  8379. case X86::BI__builtin_ia32_pmaxsb256:
  8380. case X86::BI__builtin_ia32_pmaxsw256:
  8381. case X86::BI__builtin_ia32_pmaxsd256:
  8382. case X86::BI__builtin_ia32_pmaxsq256_mask:
  8383. case X86::BI__builtin_ia32_pmaxsb512_mask:
  8384. case X86::BI__builtin_ia32_pmaxsw512_mask:
  8385. case X86::BI__builtin_ia32_pmaxsd512_mask:
  8386. case X86::BI__builtin_ia32_pmaxsq512_mask:
  8387. return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
  8388. case X86::BI__builtin_ia32_pmaxub128:
  8389. case X86::BI__builtin_ia32_pmaxuw128:
  8390. case X86::BI__builtin_ia32_pmaxud128:
  8391. case X86::BI__builtin_ia32_pmaxuq128_mask:
  8392. case X86::BI__builtin_ia32_pmaxub256:
  8393. case X86::BI__builtin_ia32_pmaxuw256:
  8394. case X86::BI__builtin_ia32_pmaxud256:
  8395. case X86::BI__builtin_ia32_pmaxuq256_mask:
  8396. case X86::BI__builtin_ia32_pmaxub512_mask:
  8397. case X86::BI__builtin_ia32_pmaxuw512_mask:
  8398. case X86::BI__builtin_ia32_pmaxud512_mask:
  8399. case X86::BI__builtin_ia32_pmaxuq512_mask:
  8400. return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
  8401. case X86::BI__builtin_ia32_pminsb128:
  8402. case X86::BI__builtin_ia32_pminsw128:
  8403. case X86::BI__builtin_ia32_pminsd128:
  8404. case X86::BI__builtin_ia32_pminsq128_mask:
  8405. case X86::BI__builtin_ia32_pminsb256:
  8406. case X86::BI__builtin_ia32_pminsw256:
  8407. case X86::BI__builtin_ia32_pminsd256:
  8408. case X86::BI__builtin_ia32_pminsq256_mask:
  8409. case X86::BI__builtin_ia32_pminsb512_mask:
  8410. case X86::BI__builtin_ia32_pminsw512_mask:
  8411. case X86::BI__builtin_ia32_pminsd512_mask:
  8412. case X86::BI__builtin_ia32_pminsq512_mask:
  8413. return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
  8414. case X86::BI__builtin_ia32_pminub128:
  8415. case X86::BI__builtin_ia32_pminuw128:
  8416. case X86::BI__builtin_ia32_pminud128:
  8417. case X86::BI__builtin_ia32_pminuq128_mask:
  8418. case X86::BI__builtin_ia32_pminub256:
  8419. case X86::BI__builtin_ia32_pminuw256:
  8420. case X86::BI__builtin_ia32_pminud256:
  8421. case X86::BI__builtin_ia32_pminuq256_mask:
  8422. case X86::BI__builtin_ia32_pminub512_mask:
  8423. case X86::BI__builtin_ia32_pminuw512_mask:
  8424. case X86::BI__builtin_ia32_pminud512_mask:
  8425. case X86::BI__builtin_ia32_pminuq512_mask:
  8426. return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
  8427. case X86::BI__builtin_ia32_pmuludq128:
  8428. case X86::BI__builtin_ia32_pmuludq256:
  8429. case X86::BI__builtin_ia32_pmuludq512:
  8430. return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
  8431. case X86::BI__builtin_ia32_pmuldq128:
  8432. case X86::BI__builtin_ia32_pmuldq256:
  8433. case X86::BI__builtin_ia32_pmuldq512:
  8434. return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
  8435. // 3DNow!
  8436. case X86::BI__builtin_ia32_pswapdsf:
  8437. case X86::BI__builtin_ia32_pswapdsi: {
  8438. llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
  8439. Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
  8440. llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
  8441. return Builder.CreateCall(F, Ops, "pswapd");
  8442. }
  8443. case X86::BI__builtin_ia32_rdrand16_step:
  8444. case X86::BI__builtin_ia32_rdrand32_step:
  8445. case X86::BI__builtin_ia32_rdrand64_step:
  8446. case X86::BI__builtin_ia32_rdseed16_step:
  8447. case X86::BI__builtin_ia32_rdseed32_step:
  8448. case X86::BI__builtin_ia32_rdseed64_step: {
  8449. Intrinsic::ID ID;
  8450. switch (BuiltinID) {
  8451. default: llvm_unreachable("Unsupported intrinsic!");
  8452. case X86::BI__builtin_ia32_rdrand16_step:
  8453. ID = Intrinsic::x86_rdrand_16;
  8454. break;
  8455. case X86::BI__builtin_ia32_rdrand32_step:
  8456. ID = Intrinsic::x86_rdrand_32;
  8457. break;
  8458. case X86::BI__builtin_ia32_rdrand64_step:
  8459. ID = Intrinsic::x86_rdrand_64;
  8460. break;
  8461. case X86::BI__builtin_ia32_rdseed16_step:
  8462. ID = Intrinsic::x86_rdseed_16;
  8463. break;
  8464. case X86::BI__builtin_ia32_rdseed32_step:
  8465. ID = Intrinsic::x86_rdseed_32;
  8466. break;
  8467. case X86::BI__builtin_ia32_rdseed64_step:
  8468. ID = Intrinsic::x86_rdseed_64;
  8469. break;
  8470. }
  8471. Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
  8472. Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
  8473. Ops[0]);
  8474. return Builder.CreateExtractValue(Call, 1);
  8475. }
  8476. case X86::BI__builtin_ia32_cmpps128_mask:
  8477. case X86::BI__builtin_ia32_cmpps256_mask:
  8478. case X86::BI__builtin_ia32_cmpps512_mask:
  8479. case X86::BI__builtin_ia32_cmppd128_mask:
  8480. case X86::BI__builtin_ia32_cmppd256_mask:
  8481. case X86::BI__builtin_ia32_cmppd512_mask: {
  8482. unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
  8483. Value *MaskIn = Ops[3];
  8484. Ops.erase(&Ops[3]);
  8485. Intrinsic::ID ID;
  8486. switch (BuiltinID) {
  8487. default: llvm_unreachable("Unsupported intrinsic!");
  8488. case X86::BI__builtin_ia32_cmpps128_mask:
  8489. ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
  8490. break;
  8491. case X86::BI__builtin_ia32_cmpps256_mask:
  8492. ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
  8493. break;
  8494. case X86::BI__builtin_ia32_cmpps512_mask:
  8495. ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
  8496. break;
  8497. case X86::BI__builtin_ia32_cmppd128_mask:
  8498. ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
  8499. break;
  8500. case X86::BI__builtin_ia32_cmppd256_mask:
  8501. ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
  8502. break;
  8503. case X86::BI__builtin_ia32_cmppd512_mask:
  8504. ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
  8505. break;
  8506. }
  8507. Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  8508. return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
  8509. }
  8510. // SSE packed comparison intrinsics
  8511. case X86::BI__builtin_ia32_cmpeqps:
  8512. case X86::BI__builtin_ia32_cmpeqpd:
  8513. return getVectorFCmpIR(CmpInst::FCMP_OEQ);
  8514. case X86::BI__builtin_ia32_cmpltps:
  8515. case X86::BI__builtin_ia32_cmpltpd:
  8516. return getVectorFCmpIR(CmpInst::FCMP_OLT);
  8517. case X86::BI__builtin_ia32_cmpleps:
  8518. case X86::BI__builtin_ia32_cmplepd:
  8519. return getVectorFCmpIR(CmpInst::FCMP_OLE);
  8520. case X86::BI__builtin_ia32_cmpunordps:
  8521. case X86::BI__builtin_ia32_cmpunordpd:
  8522. return getVectorFCmpIR(CmpInst::FCMP_UNO);
  8523. case X86::BI__builtin_ia32_cmpneqps:
  8524. case X86::BI__builtin_ia32_cmpneqpd:
  8525. return getVectorFCmpIR(CmpInst::FCMP_UNE);
  8526. case X86::BI__builtin_ia32_cmpnltps:
  8527. case X86::BI__builtin_ia32_cmpnltpd:
  8528. return getVectorFCmpIR(CmpInst::FCMP_UGE);
  8529. case X86::BI__builtin_ia32_cmpnleps:
  8530. case X86::BI__builtin_ia32_cmpnlepd:
  8531. return getVectorFCmpIR(CmpInst::FCMP_UGT);
  8532. case X86::BI__builtin_ia32_cmpordps:
  8533. case X86::BI__builtin_ia32_cmpordpd:
  8534. return getVectorFCmpIR(CmpInst::FCMP_ORD);
  8535. case X86::BI__builtin_ia32_cmpps:
  8536. case X86::BI__builtin_ia32_cmpps256:
  8537. case X86::BI__builtin_ia32_cmppd:
  8538. case X86::BI__builtin_ia32_cmppd256: {
  8539. unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
  8540. // If this one of the SSE immediates, we can use native IR.
  8541. if (CC < 8) {
  8542. FCmpInst::Predicate Pred;
  8543. switch (CC) {
  8544. case 0: Pred = FCmpInst::FCMP_OEQ; break;
  8545. case 1: Pred = FCmpInst::FCMP_OLT; break;
  8546. case 2: Pred = FCmpInst::FCMP_OLE; break;
  8547. case 3: Pred = FCmpInst::FCMP_UNO; break;
  8548. case 4: Pred = FCmpInst::FCMP_UNE; break;
  8549. case 5: Pred = FCmpInst::FCMP_UGE; break;
  8550. case 6: Pred = FCmpInst::FCMP_UGT; break;
  8551. case 7: Pred = FCmpInst::FCMP_ORD; break;
  8552. }
  8553. return getVectorFCmpIR(Pred);
  8554. }
  8555. // We can't handle 8-31 immediates with native IR, use the intrinsic.
  8556. // Except for predicates that create constants.
  8557. Intrinsic::ID ID;
  8558. switch (BuiltinID) {
  8559. default: llvm_unreachable("Unsupported intrinsic!");
  8560. case X86::BI__builtin_ia32_cmpps:
  8561. ID = Intrinsic::x86_sse_cmp_ps;
  8562. break;
  8563. case X86::BI__builtin_ia32_cmpps256:
  8564. // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
  8565. // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
  8566. if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
  8567. Value *Constant = (CC == 0xf || CC == 0x1f) ?
  8568. llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
  8569. llvm::Constant::getNullValue(Builder.getInt32Ty());
  8570. Value *Vec = Builder.CreateVectorSplat(
  8571. Ops[0]->getType()->getVectorNumElements(), Constant);
  8572. return Builder.CreateBitCast(Vec, Ops[0]->getType());
  8573. }
  8574. ID = Intrinsic::x86_avx_cmp_ps_256;
  8575. break;
  8576. case X86::BI__builtin_ia32_cmppd:
  8577. ID = Intrinsic::x86_sse2_cmp_pd;
  8578. break;
  8579. case X86::BI__builtin_ia32_cmppd256:
  8580. // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
  8581. // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
  8582. if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
  8583. Value *Constant = (CC == 0xf || CC == 0x1f) ?
  8584. llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
  8585. llvm::Constant::getNullValue(Builder.getInt64Ty());
  8586. Value *Vec = Builder.CreateVectorSplat(
  8587. Ops[0]->getType()->getVectorNumElements(), Constant);
  8588. return Builder.CreateBitCast(Vec, Ops[0]->getType());
  8589. }
  8590. ID = Intrinsic::x86_avx_cmp_pd_256;
  8591. break;
  8592. }
  8593. return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  8594. }
  8595. // SSE scalar comparison intrinsics
  8596. case X86::BI__builtin_ia32_cmpeqss:
  8597. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
  8598. case X86::BI__builtin_ia32_cmpltss:
  8599. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
  8600. case X86::BI__builtin_ia32_cmpless:
  8601. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
  8602. case X86::BI__builtin_ia32_cmpunordss:
  8603. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
  8604. case X86::BI__builtin_ia32_cmpneqss:
  8605. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
  8606. case X86::BI__builtin_ia32_cmpnltss:
  8607. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
  8608. case X86::BI__builtin_ia32_cmpnless:
  8609. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
  8610. case X86::BI__builtin_ia32_cmpordss:
  8611. return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
  8612. case X86::BI__builtin_ia32_cmpeqsd:
  8613. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
  8614. case X86::BI__builtin_ia32_cmpltsd:
  8615. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
  8616. case X86::BI__builtin_ia32_cmplesd:
  8617. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
  8618. case X86::BI__builtin_ia32_cmpunordsd:
  8619. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
  8620. case X86::BI__builtin_ia32_cmpneqsd:
  8621. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
  8622. case X86::BI__builtin_ia32_cmpnltsd:
  8623. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
  8624. case X86::BI__builtin_ia32_cmpnlesd:
  8625. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
  8626. case X86::BI__builtin_ia32_cmpordsd:
  8627. return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
  8628. case X86::BI__emul:
  8629. case X86::BI__emulu: {
  8630. llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
  8631. bool isSigned = (BuiltinID == X86::BI__emul);
  8632. Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
  8633. Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
  8634. return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
  8635. }
  8636. case X86::BI__mulh:
  8637. case X86::BI__umulh:
  8638. case X86::BI_mul128:
  8639. case X86::BI_umul128: {
  8640. llvm::Type *ResType = ConvertType(E->getType());
  8641. llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
  8642. bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
  8643. Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
  8644. Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
  8645. Value *MulResult, *HigherBits;
  8646. if (IsSigned) {
  8647. MulResult = Builder.CreateNSWMul(LHS, RHS);
  8648. HigherBits = Builder.CreateAShr(MulResult, 64);
  8649. } else {
  8650. MulResult = Builder.CreateNUWMul(LHS, RHS);
  8651. HigherBits = Builder.CreateLShr(MulResult, 64);
  8652. }
  8653. HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
  8654. if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
  8655. return HigherBits;
  8656. Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
  8657. Builder.CreateStore(HigherBits, HighBitsAddress);
  8658. return Builder.CreateIntCast(MulResult, ResType, IsSigned);
  8659. }
  8660. case X86::BI__faststorefence: {
  8661. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  8662. llvm::SyncScope::System);
  8663. }
  8664. case X86::BI_ReadWriteBarrier:
  8665. case X86::BI_ReadBarrier:
  8666. case X86::BI_WriteBarrier: {
  8667. return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
  8668. llvm::SyncScope::SingleThread);
  8669. }
  8670. case X86::BI_BitScanForward:
  8671. case X86::BI_BitScanForward64:
  8672. return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
  8673. case X86::BI_BitScanReverse:
  8674. case X86::BI_BitScanReverse64:
  8675. return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
  8676. case X86::BI_InterlockedAnd64:
  8677. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
  8678. case X86::BI_InterlockedExchange64:
  8679. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
  8680. case X86::BI_InterlockedExchangeAdd64:
  8681. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
  8682. case X86::BI_InterlockedExchangeSub64:
  8683. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
  8684. case X86::BI_InterlockedOr64:
  8685. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
  8686. case X86::BI_InterlockedXor64:
  8687. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
  8688. case X86::BI_InterlockedDecrement64:
  8689. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
  8690. case X86::BI_InterlockedIncrement64:
  8691. return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
  8692. case X86::BI_InterlockedCompareExchange128: {
  8693. // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
  8694. // instead it takes pointers to 64bit ints for Destination and
  8695. // ComparandResult, and exchange is taken as two 64bit ints (high & low).
  8696. // The previous value is written to ComparandResult, and success is
  8697. // returned.
  8698. llvm::Type *Int128Ty = Builder.getInt128Ty();
  8699. llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
  8700. Value *Destination =
  8701. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
  8702. Value *ExchangeHigh128 =
  8703. Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
  8704. Value *ExchangeLow128 =
  8705. Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
  8706. Address ComparandResult(
  8707. Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
  8708. getContext().toCharUnitsFromBits(128));
  8709. Value *Exchange = Builder.CreateOr(
  8710. Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
  8711. ExchangeLow128);
  8712. Value *Comparand = Builder.CreateLoad(ComparandResult);
  8713. AtomicCmpXchgInst *CXI =
  8714. Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
  8715. AtomicOrdering::SequentiallyConsistent,
  8716. AtomicOrdering::SequentiallyConsistent);
  8717. CXI->setVolatile(true);
  8718. // Write the result back to the inout pointer.
  8719. Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
  8720. // Get the success boolean and zero extend it to i8.
  8721. Value *Success = Builder.CreateExtractValue(CXI, 1);
  8722. return Builder.CreateZExt(Success, ConvertType(E->getType()));
  8723. }
  8724. case X86::BI_AddressOfReturnAddress: {
  8725. Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
  8726. return Builder.CreateCall(F);
  8727. }
  8728. case X86::BI__stosb: {
  8729. // We treat __stosb as a volatile memset - it may not generate "rep stosb"
  8730. // instruction, but it will create a memset that won't be optimized away.
  8731. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
  8732. }
  8733. case X86::BI__ud2:
  8734. // llvm.trap makes a ud2a instruction on x86.
  8735. return EmitTrapCall(Intrinsic::trap);
  8736. case X86::BI__int2c: {
  8737. // This syscall signals a driver assertion failure in x86 NT kernels.
  8738. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
  8739. llvm::InlineAsm *IA =
  8740. llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
  8741. llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
  8742. getLLVMContext(), llvm::AttributeList::FunctionIndex,
  8743. llvm::Attribute::NoReturn);
  8744. CallSite CS = Builder.CreateCall(IA);
  8745. CS.setAttributes(NoReturnAttr);
  8746. return CS.getInstruction();
  8747. }
  8748. case X86::BI__readfsbyte:
  8749. case X86::BI__readfsword:
  8750. case X86::BI__readfsdword:
  8751. case X86::BI__readfsqword: {
  8752. llvm::Type *IntTy = ConvertType(E->getType());
  8753. Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
  8754. llvm::PointerType::get(IntTy, 257));
  8755. LoadInst *Load = Builder.CreateAlignedLoad(
  8756. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  8757. Load->setVolatile(true);
  8758. return Load;
  8759. }
  8760. case X86::BI__readgsbyte:
  8761. case X86::BI__readgsword:
  8762. case X86::BI__readgsdword:
  8763. case X86::BI__readgsqword: {
  8764. llvm::Type *IntTy = ConvertType(E->getType());
  8765. Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
  8766. llvm::PointerType::get(IntTy, 256));
  8767. LoadInst *Load = Builder.CreateAlignedLoad(
  8768. IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
  8769. Load->setVolatile(true);
  8770. return Load;
  8771. }
  8772. }
  8773. }
  8774. Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
  8775. const CallExpr *E) {
  8776. SmallVector<Value*, 4> Ops;
  8777. for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
  8778. Ops.push_back(EmitScalarExpr(E->getArg(i)));
  8779. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  8780. switch (BuiltinID) {
  8781. default: return nullptr;
  8782. // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
  8783. // call __builtin_readcyclecounter.
  8784. case PPC::BI__builtin_ppc_get_timebase:
  8785. return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
  8786. // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
  8787. case PPC::BI__builtin_altivec_lvx:
  8788. case PPC::BI__builtin_altivec_lvxl:
  8789. case PPC::BI__builtin_altivec_lvebx:
  8790. case PPC::BI__builtin_altivec_lvehx:
  8791. case PPC::BI__builtin_altivec_lvewx:
  8792. case PPC::BI__builtin_altivec_lvsl:
  8793. case PPC::BI__builtin_altivec_lvsr:
  8794. case PPC::BI__builtin_vsx_lxvd2x:
  8795. case PPC::BI__builtin_vsx_lxvw4x:
  8796. case PPC::BI__builtin_vsx_lxvd2x_be:
  8797. case PPC::BI__builtin_vsx_lxvw4x_be:
  8798. case PPC::BI__builtin_vsx_lxvl:
  8799. case PPC::BI__builtin_vsx_lxvll:
  8800. {
  8801. if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
  8802. BuiltinID == PPC::BI__builtin_vsx_lxvll){
  8803. Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
  8804. }else {
  8805. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  8806. Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
  8807. Ops.pop_back();
  8808. }
  8809. switch (BuiltinID) {
  8810. default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
  8811. case PPC::BI__builtin_altivec_lvx:
  8812. ID = Intrinsic::ppc_altivec_lvx;
  8813. break;
  8814. case PPC::BI__builtin_altivec_lvxl:
  8815. ID = Intrinsic::ppc_altivec_lvxl;
  8816. break;
  8817. case PPC::BI__builtin_altivec_lvebx:
  8818. ID = Intrinsic::ppc_altivec_lvebx;
  8819. break;
  8820. case PPC::BI__builtin_altivec_lvehx:
  8821. ID = Intrinsic::ppc_altivec_lvehx;
  8822. break;
  8823. case PPC::BI__builtin_altivec_lvewx:
  8824. ID = Intrinsic::ppc_altivec_lvewx;
  8825. break;
  8826. case PPC::BI__builtin_altivec_lvsl:
  8827. ID = Intrinsic::ppc_altivec_lvsl;
  8828. break;
  8829. case PPC::BI__builtin_altivec_lvsr:
  8830. ID = Intrinsic::ppc_altivec_lvsr;
  8831. break;
  8832. case PPC::BI__builtin_vsx_lxvd2x:
  8833. ID = Intrinsic::ppc_vsx_lxvd2x;
  8834. break;
  8835. case PPC::BI__builtin_vsx_lxvw4x:
  8836. ID = Intrinsic::ppc_vsx_lxvw4x;
  8837. break;
  8838. case PPC::BI__builtin_vsx_lxvd2x_be:
  8839. ID = Intrinsic::ppc_vsx_lxvd2x_be;
  8840. break;
  8841. case PPC::BI__builtin_vsx_lxvw4x_be:
  8842. ID = Intrinsic::ppc_vsx_lxvw4x_be;
  8843. break;
  8844. case PPC::BI__builtin_vsx_lxvl:
  8845. ID = Intrinsic::ppc_vsx_lxvl;
  8846. break;
  8847. case PPC::BI__builtin_vsx_lxvll:
  8848. ID = Intrinsic::ppc_vsx_lxvll;
  8849. break;
  8850. }
  8851. llvm::Function *F = CGM.getIntrinsic(ID);
  8852. return Builder.CreateCall(F, Ops, "");
  8853. }
  8854. // vec_st, vec_xst_be
  8855. case PPC::BI__builtin_altivec_stvx:
  8856. case PPC::BI__builtin_altivec_stvxl:
  8857. case PPC::BI__builtin_altivec_stvebx:
  8858. case PPC::BI__builtin_altivec_stvehx:
  8859. case PPC::BI__builtin_altivec_stvewx:
  8860. case PPC::BI__builtin_vsx_stxvd2x:
  8861. case PPC::BI__builtin_vsx_stxvw4x:
  8862. case PPC::BI__builtin_vsx_stxvd2x_be:
  8863. case PPC::BI__builtin_vsx_stxvw4x_be:
  8864. case PPC::BI__builtin_vsx_stxvl:
  8865. case PPC::BI__builtin_vsx_stxvll:
  8866. {
  8867. if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
  8868. BuiltinID == PPC::BI__builtin_vsx_stxvll ){
  8869. Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
  8870. }else {
  8871. Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
  8872. Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
  8873. Ops.pop_back();
  8874. }
  8875. switch (BuiltinID) {
  8876. default: llvm_unreachable("Unsupported st intrinsic!");
  8877. case PPC::BI__builtin_altivec_stvx:
  8878. ID = Intrinsic::ppc_altivec_stvx;
  8879. break;
  8880. case PPC::BI__builtin_altivec_stvxl:
  8881. ID = Intrinsic::ppc_altivec_stvxl;
  8882. break;
  8883. case PPC::BI__builtin_altivec_stvebx:
  8884. ID = Intrinsic::ppc_altivec_stvebx;
  8885. break;
  8886. case PPC::BI__builtin_altivec_stvehx:
  8887. ID = Intrinsic::ppc_altivec_stvehx;
  8888. break;
  8889. case PPC::BI__builtin_altivec_stvewx:
  8890. ID = Intrinsic::ppc_altivec_stvewx;
  8891. break;
  8892. case PPC::BI__builtin_vsx_stxvd2x:
  8893. ID = Intrinsic::ppc_vsx_stxvd2x;
  8894. break;
  8895. case PPC::BI__builtin_vsx_stxvw4x:
  8896. ID = Intrinsic::ppc_vsx_stxvw4x;
  8897. break;
  8898. case PPC::BI__builtin_vsx_stxvd2x_be:
  8899. ID = Intrinsic::ppc_vsx_stxvd2x_be;
  8900. break;
  8901. case PPC::BI__builtin_vsx_stxvw4x_be:
  8902. ID = Intrinsic::ppc_vsx_stxvw4x_be;
  8903. break;
  8904. case PPC::BI__builtin_vsx_stxvl:
  8905. ID = Intrinsic::ppc_vsx_stxvl;
  8906. break;
  8907. case PPC::BI__builtin_vsx_stxvll:
  8908. ID = Intrinsic::ppc_vsx_stxvll;
  8909. break;
  8910. }
  8911. llvm::Function *F = CGM.getIntrinsic(ID);
  8912. return Builder.CreateCall(F, Ops, "");
  8913. }
  8914. // Square root
  8915. case PPC::BI__builtin_vsx_xvsqrtsp:
  8916. case PPC::BI__builtin_vsx_xvsqrtdp: {
  8917. llvm::Type *ResultType = ConvertType(E->getType());
  8918. Value *X = EmitScalarExpr(E->getArg(0));
  8919. ID = Intrinsic::sqrt;
  8920. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  8921. return Builder.CreateCall(F, X);
  8922. }
  8923. // Count leading zeros
  8924. case PPC::BI__builtin_altivec_vclzb:
  8925. case PPC::BI__builtin_altivec_vclzh:
  8926. case PPC::BI__builtin_altivec_vclzw:
  8927. case PPC::BI__builtin_altivec_vclzd: {
  8928. llvm::Type *ResultType = ConvertType(E->getType());
  8929. Value *X = EmitScalarExpr(E->getArg(0));
  8930. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  8931. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  8932. return Builder.CreateCall(F, {X, Undef});
  8933. }
  8934. case PPC::BI__builtin_altivec_vctzb:
  8935. case PPC::BI__builtin_altivec_vctzh:
  8936. case PPC::BI__builtin_altivec_vctzw:
  8937. case PPC::BI__builtin_altivec_vctzd: {
  8938. llvm::Type *ResultType = ConvertType(E->getType());
  8939. Value *X = EmitScalarExpr(E->getArg(0));
  8940. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  8941. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  8942. return Builder.CreateCall(F, {X, Undef});
  8943. }
  8944. case PPC::BI__builtin_altivec_vpopcntb:
  8945. case PPC::BI__builtin_altivec_vpopcnth:
  8946. case PPC::BI__builtin_altivec_vpopcntw:
  8947. case PPC::BI__builtin_altivec_vpopcntd: {
  8948. llvm::Type *ResultType = ConvertType(E->getType());
  8949. Value *X = EmitScalarExpr(E->getArg(0));
  8950. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  8951. return Builder.CreateCall(F, X);
  8952. }
  8953. // Copy sign
  8954. case PPC::BI__builtin_vsx_xvcpsgnsp:
  8955. case PPC::BI__builtin_vsx_xvcpsgndp: {
  8956. llvm::Type *ResultType = ConvertType(E->getType());
  8957. Value *X = EmitScalarExpr(E->getArg(0));
  8958. Value *Y = EmitScalarExpr(E->getArg(1));
  8959. ID = Intrinsic::copysign;
  8960. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  8961. return Builder.CreateCall(F, {X, Y});
  8962. }
  8963. // Rounding/truncation
  8964. case PPC::BI__builtin_vsx_xvrspip:
  8965. case PPC::BI__builtin_vsx_xvrdpip:
  8966. case PPC::BI__builtin_vsx_xvrdpim:
  8967. case PPC::BI__builtin_vsx_xvrspim:
  8968. case PPC::BI__builtin_vsx_xvrdpi:
  8969. case PPC::BI__builtin_vsx_xvrspi:
  8970. case PPC::BI__builtin_vsx_xvrdpic:
  8971. case PPC::BI__builtin_vsx_xvrspic:
  8972. case PPC::BI__builtin_vsx_xvrdpiz:
  8973. case PPC::BI__builtin_vsx_xvrspiz: {
  8974. llvm::Type *ResultType = ConvertType(E->getType());
  8975. Value *X = EmitScalarExpr(E->getArg(0));
  8976. if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
  8977. BuiltinID == PPC::BI__builtin_vsx_xvrspim)
  8978. ID = Intrinsic::floor;
  8979. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
  8980. BuiltinID == PPC::BI__builtin_vsx_xvrspi)
  8981. ID = Intrinsic::round;
  8982. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
  8983. BuiltinID == PPC::BI__builtin_vsx_xvrspic)
  8984. ID = Intrinsic::nearbyint;
  8985. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
  8986. BuiltinID == PPC::BI__builtin_vsx_xvrspip)
  8987. ID = Intrinsic::ceil;
  8988. else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
  8989. BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
  8990. ID = Intrinsic::trunc;
  8991. llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
  8992. return Builder.CreateCall(F, X);
  8993. }
  8994. // Absolute value
  8995. case PPC::BI__builtin_vsx_xvabsdp:
  8996. case PPC::BI__builtin_vsx_xvabssp: {
  8997. llvm::Type *ResultType = ConvertType(E->getType());
  8998. Value *X = EmitScalarExpr(E->getArg(0));
  8999. llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  9000. return Builder.CreateCall(F, X);
  9001. }
  9002. // FMA variations
  9003. case PPC::BI__builtin_vsx_xvmaddadp:
  9004. case PPC::BI__builtin_vsx_xvmaddasp:
  9005. case PPC::BI__builtin_vsx_xvnmaddadp:
  9006. case PPC::BI__builtin_vsx_xvnmaddasp:
  9007. case PPC::BI__builtin_vsx_xvmsubadp:
  9008. case PPC::BI__builtin_vsx_xvmsubasp:
  9009. case PPC::BI__builtin_vsx_xvnmsubadp:
  9010. case PPC::BI__builtin_vsx_xvnmsubasp: {
  9011. llvm::Type *ResultType = ConvertType(E->getType());
  9012. Value *X = EmitScalarExpr(E->getArg(0));
  9013. Value *Y = EmitScalarExpr(E->getArg(1));
  9014. Value *Z = EmitScalarExpr(E->getArg(2));
  9015. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
  9016. llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  9017. switch (BuiltinID) {
  9018. case PPC::BI__builtin_vsx_xvmaddadp:
  9019. case PPC::BI__builtin_vsx_xvmaddasp:
  9020. return Builder.CreateCall(F, {X, Y, Z});
  9021. case PPC::BI__builtin_vsx_xvnmaddadp:
  9022. case PPC::BI__builtin_vsx_xvnmaddasp:
  9023. return Builder.CreateFSub(Zero,
  9024. Builder.CreateCall(F, {X, Y, Z}), "sub");
  9025. case PPC::BI__builtin_vsx_xvmsubadp:
  9026. case PPC::BI__builtin_vsx_xvmsubasp:
  9027. return Builder.CreateCall(F,
  9028. {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
  9029. case PPC::BI__builtin_vsx_xvnmsubadp:
  9030. case PPC::BI__builtin_vsx_xvnmsubasp:
  9031. Value *FsubRes =
  9032. Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
  9033. return Builder.CreateFSub(Zero, FsubRes, "sub");
  9034. }
  9035. llvm_unreachable("Unknown FMA operation");
  9036. return nullptr; // Suppress no-return warning
  9037. }
  9038. case PPC::BI__builtin_vsx_insertword: {
  9039. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
  9040. // Third argument is a compile time constant int. It must be clamped to
  9041. // to the range [0, 12].
  9042. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  9043. assert(ArgCI &&
  9044. "Third arg to xxinsertw intrinsic must be constant integer");
  9045. const int64_t MaxIndex = 12;
  9046. int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
  9047. // The builtin semantics don't exactly match the xxinsertw instructions
  9048. // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
  9049. // word from the first argument, and inserts it in the second argument. The
  9050. // instruction extracts the word from its second input register and inserts
  9051. // it into its first input register, so swap the first and second arguments.
  9052. std::swap(Ops[0], Ops[1]);
  9053. // Need to cast the second argument from a vector of unsigned int to a
  9054. // vector of long long.
  9055. Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
  9056. if (getTarget().isLittleEndian()) {
  9057. // Create a shuffle mask of (1, 0)
  9058. Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
  9059. ConstantInt::get(Int32Ty, 0)
  9060. };
  9061. Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
  9062. // Reverse the double words in the vector we will extract from.
  9063. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
  9064. Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
  9065. // Reverse the index.
  9066. Index = MaxIndex - Index;
  9067. }
  9068. // Intrinsic expects the first arg to be a vector of int.
  9069. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
  9070. Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
  9071. return Builder.CreateCall(F, Ops);
  9072. }
  9073. case PPC::BI__builtin_vsx_extractuword: {
  9074. llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
  9075. // Intrinsic expects the first argument to be a vector of doublewords.
  9076. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
  9077. // The second argument is a compile time constant int that needs to
  9078. // be clamped to the range [0, 12].
  9079. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
  9080. assert(ArgCI &&
  9081. "Second Arg to xxextractuw intrinsic must be a constant integer!");
  9082. const int64_t MaxIndex = 12;
  9083. int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
  9084. if (getTarget().isLittleEndian()) {
  9085. // Reverse the index.
  9086. Index = MaxIndex - Index;
  9087. Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
  9088. // Emit the call, then reverse the double words of the results vector.
  9089. Value *Call = Builder.CreateCall(F, Ops);
  9090. // Create a shuffle mask of (1, 0)
  9091. Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
  9092. ConstantInt::get(Int32Ty, 0)
  9093. };
  9094. Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
  9095. Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
  9096. return ShuffleCall;
  9097. } else {
  9098. Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
  9099. return Builder.CreateCall(F, Ops);
  9100. }
  9101. }
  9102. case PPC::BI__builtin_vsx_xxpermdi: {
  9103. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  9104. assert(ArgCI && "Third arg must be constant integer!");
  9105. unsigned Index = ArgCI->getZExtValue();
  9106. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
  9107. Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
  9108. // Element zero comes from the first input vector and element one comes from
  9109. // the second. The element indices within each vector are numbered in big
  9110. // endian order so the shuffle mask must be adjusted for this on little
  9111. // endian platforms (i.e. index is complemented and source vector reversed).
  9112. unsigned ElemIdx0;
  9113. unsigned ElemIdx1;
  9114. if (getTarget().isLittleEndian()) {
  9115. ElemIdx0 = (~Index & 1) + 2;
  9116. ElemIdx1 = (~Index & 2) >> 1;
  9117. } else { // BigEndian
  9118. ElemIdx0 = (Index & 2) >> 1;
  9119. ElemIdx1 = 2 + (Index & 1);
  9120. }
  9121. Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
  9122. ConstantInt::get(Int32Ty, ElemIdx1)};
  9123. Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
  9124. Value *ShuffleCall =
  9125. Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
  9126. QualType BIRetType = E->getType();
  9127. auto RetTy = ConvertType(BIRetType);
  9128. return Builder.CreateBitCast(ShuffleCall, RetTy);
  9129. }
  9130. case PPC::BI__builtin_vsx_xxsldwi: {
  9131. ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
  9132. assert(ArgCI && "Third argument must be a compile time constant");
  9133. unsigned Index = ArgCI->getZExtValue() & 0x3;
  9134. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
  9135. Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
  9136. // Create a shuffle mask
  9137. unsigned ElemIdx0;
  9138. unsigned ElemIdx1;
  9139. unsigned ElemIdx2;
  9140. unsigned ElemIdx3;
  9141. if (getTarget().isLittleEndian()) {
  9142. // Little endian element N comes from element 8+N-Index of the
  9143. // concatenated wide vector (of course, using modulo arithmetic on
  9144. // the total number of elements).
  9145. ElemIdx0 = (8 - Index) % 8;
  9146. ElemIdx1 = (9 - Index) % 8;
  9147. ElemIdx2 = (10 - Index) % 8;
  9148. ElemIdx3 = (11 - Index) % 8;
  9149. } else {
  9150. // Big endian ElemIdx<N> = Index + N
  9151. ElemIdx0 = Index;
  9152. ElemIdx1 = Index + 1;
  9153. ElemIdx2 = Index + 2;
  9154. ElemIdx3 = Index + 3;
  9155. }
  9156. Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
  9157. ConstantInt::get(Int32Ty, ElemIdx1),
  9158. ConstantInt::get(Int32Ty, ElemIdx2),
  9159. ConstantInt::get(Int32Ty, ElemIdx3)};
  9160. Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
  9161. Value *ShuffleCall =
  9162. Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
  9163. QualType BIRetType = E->getType();
  9164. auto RetTy = ConvertType(BIRetType);
  9165. return Builder.CreateBitCast(ShuffleCall, RetTy);
  9166. }
  9167. }
  9168. }
  9169. Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
  9170. const CallExpr *E) {
  9171. switch (BuiltinID) {
  9172. case AMDGPU::BI__builtin_amdgcn_div_scale:
  9173. case AMDGPU::BI__builtin_amdgcn_div_scalef: {
  9174. // Translate from the intrinsics's struct return to the builtin's out
  9175. // argument.
  9176. Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
  9177. llvm::Value *X = EmitScalarExpr(E->getArg(0));
  9178. llvm::Value *Y = EmitScalarExpr(E->getArg(1));
  9179. llvm::Value *Z = EmitScalarExpr(E->getArg(2));
  9180. llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
  9181. X->getType());
  9182. llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
  9183. llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
  9184. llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
  9185. llvm::Type *RealFlagType
  9186. = FlagOutPtr.getPointer()->getType()->getPointerElementType();
  9187. llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
  9188. Builder.CreateStore(FlagExt, FlagOutPtr);
  9189. return Result;
  9190. }
  9191. case AMDGPU::BI__builtin_amdgcn_div_fmas:
  9192. case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
  9193. llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
  9194. llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
  9195. llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
  9196. llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
  9197. llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
  9198. Src0->getType());
  9199. llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
  9200. return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
  9201. }
  9202. case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
  9203. return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
  9204. case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
  9205. llvm::SmallVector<llvm::Value *, 5> Args;
  9206. for (unsigned I = 0; I != 5; ++I)
  9207. Args.push_back(EmitScalarExpr(E->getArg(I)));
  9208. Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
  9209. Args[0]->getType());
  9210. return Builder.CreateCall(F, Args);
  9211. }
  9212. case AMDGPU::BI__builtin_amdgcn_div_fixup:
  9213. case AMDGPU::BI__builtin_amdgcn_div_fixupf:
  9214. case AMDGPU::BI__builtin_amdgcn_div_fixuph:
  9215. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
  9216. case AMDGPU::BI__builtin_amdgcn_trig_preop:
  9217. case AMDGPU::BI__builtin_amdgcn_trig_preopf:
  9218. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
  9219. case AMDGPU::BI__builtin_amdgcn_rcp:
  9220. case AMDGPU::BI__builtin_amdgcn_rcpf:
  9221. case AMDGPU::BI__builtin_amdgcn_rcph:
  9222. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
  9223. case AMDGPU::BI__builtin_amdgcn_rsq:
  9224. case AMDGPU::BI__builtin_amdgcn_rsqf:
  9225. case AMDGPU::BI__builtin_amdgcn_rsqh:
  9226. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
  9227. case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
  9228. case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
  9229. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
  9230. case AMDGPU::BI__builtin_amdgcn_sinf:
  9231. case AMDGPU::BI__builtin_amdgcn_sinh:
  9232. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
  9233. case AMDGPU::BI__builtin_amdgcn_cosf:
  9234. case AMDGPU::BI__builtin_amdgcn_cosh:
  9235. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
  9236. case AMDGPU::BI__builtin_amdgcn_log_clampf:
  9237. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
  9238. case AMDGPU::BI__builtin_amdgcn_ldexp:
  9239. case AMDGPU::BI__builtin_amdgcn_ldexpf:
  9240. case AMDGPU::BI__builtin_amdgcn_ldexph:
  9241. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
  9242. case AMDGPU::BI__builtin_amdgcn_frexp_mant:
  9243. case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
  9244. case AMDGPU::BI__builtin_amdgcn_frexp_manth:
  9245. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
  9246. case AMDGPU::BI__builtin_amdgcn_frexp_exp:
  9247. case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
  9248. Value *Src0 = EmitScalarExpr(E->getArg(0));
  9249. Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  9250. { Builder.getInt32Ty(), Src0->getType() });
  9251. return Builder.CreateCall(F, Src0);
  9252. }
  9253. case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
  9254. Value *Src0 = EmitScalarExpr(E->getArg(0));
  9255. Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
  9256. { Builder.getInt16Ty(), Src0->getType() });
  9257. return Builder.CreateCall(F, Src0);
  9258. }
  9259. case AMDGPU::BI__builtin_amdgcn_fract:
  9260. case AMDGPU::BI__builtin_amdgcn_fractf:
  9261. case AMDGPU::BI__builtin_amdgcn_fracth:
  9262. return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
  9263. case AMDGPU::BI__builtin_amdgcn_lerp:
  9264. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
  9265. case AMDGPU::BI__builtin_amdgcn_uicmp:
  9266. case AMDGPU::BI__builtin_amdgcn_uicmpl:
  9267. case AMDGPU::BI__builtin_amdgcn_sicmp:
  9268. case AMDGPU::BI__builtin_amdgcn_sicmpl:
  9269. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
  9270. case AMDGPU::BI__builtin_amdgcn_fcmp:
  9271. case AMDGPU::BI__builtin_amdgcn_fcmpf:
  9272. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
  9273. case AMDGPU::BI__builtin_amdgcn_class:
  9274. case AMDGPU::BI__builtin_amdgcn_classf:
  9275. case AMDGPU::BI__builtin_amdgcn_classh:
  9276. return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
  9277. case AMDGPU::BI__builtin_amdgcn_fmed3f:
  9278. case AMDGPU::BI__builtin_amdgcn_fmed3h:
  9279. return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
  9280. case AMDGPU::BI__builtin_amdgcn_read_exec: {
  9281. CallInst *CI = cast<CallInst>(
  9282. EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
  9283. CI->setConvergent();
  9284. return CI;
  9285. }
  9286. case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
  9287. case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
  9288. StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
  9289. "exec_lo" : "exec_hi";
  9290. CallInst *CI = cast<CallInst>(
  9291. EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
  9292. CI->setConvergent();
  9293. return CI;
  9294. }
  9295. // amdgcn workitem
  9296. case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
  9297. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
  9298. case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
  9299. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
  9300. case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
  9301. return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
  9302. // r600 intrinsics
  9303. case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
  9304. case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
  9305. return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
  9306. case AMDGPU::BI__builtin_r600_read_tidig_x:
  9307. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
  9308. case AMDGPU::BI__builtin_r600_read_tidig_y:
  9309. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
  9310. case AMDGPU::BI__builtin_r600_read_tidig_z:
  9311. return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
  9312. default:
  9313. return nullptr;
  9314. }
  9315. }
  9316. /// Handle a SystemZ function in which the final argument is a pointer
  9317. /// to an int that receives the post-instruction CC value. At the LLVM level
  9318. /// this is represented as a function that returns a {result, cc} pair.
  9319. static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
  9320. unsigned IntrinsicID,
  9321. const CallExpr *E) {
  9322. unsigned NumArgs = E->getNumArgs() - 1;
  9323. SmallVector<Value *, 8> Args(NumArgs);
  9324. for (unsigned I = 0; I < NumArgs; ++I)
  9325. Args[I] = CGF.EmitScalarExpr(E->getArg(I));
  9326. Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
  9327. Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
  9328. Value *Call = CGF.Builder.CreateCall(F, Args);
  9329. Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
  9330. CGF.Builder.CreateStore(CC, CCPtr);
  9331. return CGF.Builder.CreateExtractValue(Call, 0);
  9332. }
  9333. Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
  9334. const CallExpr *E) {
  9335. switch (BuiltinID) {
  9336. case SystemZ::BI__builtin_tbegin: {
  9337. Value *TDB = EmitScalarExpr(E->getArg(0));
  9338. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  9339. Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
  9340. return Builder.CreateCall(F, {TDB, Control});
  9341. }
  9342. case SystemZ::BI__builtin_tbegin_nofloat: {
  9343. Value *TDB = EmitScalarExpr(E->getArg(0));
  9344. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
  9345. Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
  9346. return Builder.CreateCall(F, {TDB, Control});
  9347. }
  9348. case SystemZ::BI__builtin_tbeginc: {
  9349. Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
  9350. Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
  9351. Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
  9352. return Builder.CreateCall(F, {TDB, Control});
  9353. }
  9354. case SystemZ::BI__builtin_tabort: {
  9355. Value *Data = EmitScalarExpr(E->getArg(0));
  9356. Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
  9357. return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
  9358. }
  9359. case SystemZ::BI__builtin_non_tx_store: {
  9360. Value *Address = EmitScalarExpr(E->getArg(0));
  9361. Value *Data = EmitScalarExpr(E->getArg(1));
  9362. Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
  9363. return Builder.CreateCall(F, {Data, Address});
  9364. }
  9365. // Vector builtins. Note that most vector builtins are mapped automatically
  9366. // to target-specific LLVM intrinsics. The ones handled specially here can
  9367. // be represented via standard LLVM IR, which is preferable to enable common
  9368. // LLVM optimizations.
  9369. case SystemZ::BI__builtin_s390_vpopctb:
  9370. case SystemZ::BI__builtin_s390_vpopcth:
  9371. case SystemZ::BI__builtin_s390_vpopctf:
  9372. case SystemZ::BI__builtin_s390_vpopctg: {
  9373. llvm::Type *ResultType = ConvertType(E->getType());
  9374. Value *X = EmitScalarExpr(E->getArg(0));
  9375. Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
  9376. return Builder.CreateCall(F, X);
  9377. }
  9378. case SystemZ::BI__builtin_s390_vclzb:
  9379. case SystemZ::BI__builtin_s390_vclzh:
  9380. case SystemZ::BI__builtin_s390_vclzf:
  9381. case SystemZ::BI__builtin_s390_vclzg: {
  9382. llvm::Type *ResultType = ConvertType(E->getType());
  9383. Value *X = EmitScalarExpr(E->getArg(0));
  9384. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  9385. Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
  9386. return Builder.CreateCall(F, {X, Undef});
  9387. }
  9388. case SystemZ::BI__builtin_s390_vctzb:
  9389. case SystemZ::BI__builtin_s390_vctzh:
  9390. case SystemZ::BI__builtin_s390_vctzf:
  9391. case SystemZ::BI__builtin_s390_vctzg: {
  9392. llvm::Type *ResultType = ConvertType(E->getType());
  9393. Value *X = EmitScalarExpr(E->getArg(0));
  9394. Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
  9395. Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
  9396. return Builder.CreateCall(F, {X, Undef});
  9397. }
  9398. case SystemZ::BI__builtin_s390_vfsqsb:
  9399. case SystemZ::BI__builtin_s390_vfsqdb: {
  9400. llvm::Type *ResultType = ConvertType(E->getType());
  9401. Value *X = EmitScalarExpr(E->getArg(0));
  9402. Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
  9403. return Builder.CreateCall(F, X);
  9404. }
  9405. case SystemZ::BI__builtin_s390_vfmasb:
  9406. case SystemZ::BI__builtin_s390_vfmadb: {
  9407. llvm::Type *ResultType = ConvertType(E->getType());
  9408. Value *X = EmitScalarExpr(E->getArg(0));
  9409. Value *Y = EmitScalarExpr(E->getArg(1));
  9410. Value *Z = EmitScalarExpr(E->getArg(2));
  9411. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  9412. return Builder.CreateCall(F, {X, Y, Z});
  9413. }
  9414. case SystemZ::BI__builtin_s390_vfmssb:
  9415. case SystemZ::BI__builtin_s390_vfmsdb: {
  9416. llvm::Type *ResultType = ConvertType(E->getType());
  9417. Value *X = EmitScalarExpr(E->getArg(0));
  9418. Value *Y = EmitScalarExpr(E->getArg(1));
  9419. Value *Z = EmitScalarExpr(E->getArg(2));
  9420. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
  9421. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  9422. return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
  9423. }
  9424. case SystemZ::BI__builtin_s390_vfnmasb:
  9425. case SystemZ::BI__builtin_s390_vfnmadb: {
  9426. llvm::Type *ResultType = ConvertType(E->getType());
  9427. Value *X = EmitScalarExpr(E->getArg(0));
  9428. Value *Y = EmitScalarExpr(E->getArg(1));
  9429. Value *Z = EmitScalarExpr(E->getArg(2));
  9430. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
  9431. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  9432. return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
  9433. }
  9434. case SystemZ::BI__builtin_s390_vfnmssb:
  9435. case SystemZ::BI__builtin_s390_vfnmsdb: {
  9436. llvm::Type *ResultType = ConvertType(E->getType());
  9437. Value *X = EmitScalarExpr(E->getArg(0));
  9438. Value *Y = EmitScalarExpr(E->getArg(1));
  9439. Value *Z = EmitScalarExpr(E->getArg(2));
  9440. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
  9441. Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
  9442. Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
  9443. return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
  9444. }
  9445. case SystemZ::BI__builtin_s390_vflpsb:
  9446. case SystemZ::BI__builtin_s390_vflpdb: {
  9447. llvm::Type *ResultType = ConvertType(E->getType());
  9448. Value *X = EmitScalarExpr(E->getArg(0));
  9449. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  9450. return Builder.CreateCall(F, X);
  9451. }
  9452. case SystemZ::BI__builtin_s390_vflnsb:
  9453. case SystemZ::BI__builtin_s390_vflndb: {
  9454. llvm::Type *ResultType = ConvertType(E->getType());
  9455. Value *X = EmitScalarExpr(E->getArg(0));
  9456. Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
  9457. Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
  9458. return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
  9459. }
  9460. case SystemZ::BI__builtin_s390_vfisb:
  9461. case SystemZ::BI__builtin_s390_vfidb: {
  9462. llvm::Type *ResultType = ConvertType(E->getType());
  9463. Value *X = EmitScalarExpr(E->getArg(0));
  9464. // Constant-fold the M4 and M5 mask arguments.
  9465. llvm::APSInt M4, M5;
  9466. bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
  9467. bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
  9468. assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
  9469. (void)IsConstM4; (void)IsConstM5;
  9470. // Check whether this instance can be represented via a LLVM standard
  9471. // intrinsic. We only support some combinations of M4 and M5.
  9472. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  9473. switch (M4.getZExtValue()) {
  9474. default: break;
  9475. case 0: // IEEE-inexact exception allowed
  9476. switch (M5.getZExtValue()) {
  9477. default: break;
  9478. case 0: ID = Intrinsic::rint; break;
  9479. }
  9480. break;
  9481. case 4: // IEEE-inexact exception suppressed
  9482. switch (M5.getZExtValue()) {
  9483. default: break;
  9484. case 0: ID = Intrinsic::nearbyint; break;
  9485. case 1: ID = Intrinsic::round; break;
  9486. case 5: ID = Intrinsic::trunc; break;
  9487. case 6: ID = Intrinsic::ceil; break;
  9488. case 7: ID = Intrinsic::floor; break;
  9489. }
  9490. break;
  9491. }
  9492. if (ID != Intrinsic::not_intrinsic) {
  9493. Function *F = CGM.getIntrinsic(ID, ResultType);
  9494. return Builder.CreateCall(F, X);
  9495. }
  9496. switch (BuiltinID) {
  9497. case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
  9498. case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
  9499. default: llvm_unreachable("Unknown BuiltinID");
  9500. }
  9501. Function *F = CGM.getIntrinsic(ID);
  9502. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  9503. Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
  9504. return Builder.CreateCall(F, {X, M4Value, M5Value});
  9505. }
  9506. case SystemZ::BI__builtin_s390_vfmaxsb:
  9507. case SystemZ::BI__builtin_s390_vfmaxdb: {
  9508. llvm::Type *ResultType = ConvertType(E->getType());
  9509. Value *X = EmitScalarExpr(E->getArg(0));
  9510. Value *Y = EmitScalarExpr(E->getArg(1));
  9511. // Constant-fold the M4 mask argument.
  9512. llvm::APSInt M4;
  9513. bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
  9514. assert(IsConstM4 && "Constant arg isn't actually constant?");
  9515. (void)IsConstM4;
  9516. // Check whether this instance can be represented via a LLVM standard
  9517. // intrinsic. We only support some values of M4.
  9518. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  9519. switch (M4.getZExtValue()) {
  9520. default: break;
  9521. case 4: ID = Intrinsic::maxnum; break;
  9522. }
  9523. if (ID != Intrinsic::not_intrinsic) {
  9524. Function *F = CGM.getIntrinsic(ID, ResultType);
  9525. return Builder.CreateCall(F, {X, Y});
  9526. }
  9527. switch (BuiltinID) {
  9528. case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
  9529. case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
  9530. default: llvm_unreachable("Unknown BuiltinID");
  9531. }
  9532. Function *F = CGM.getIntrinsic(ID);
  9533. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  9534. return Builder.CreateCall(F, {X, Y, M4Value});
  9535. }
  9536. case SystemZ::BI__builtin_s390_vfminsb:
  9537. case SystemZ::BI__builtin_s390_vfmindb: {
  9538. llvm::Type *ResultType = ConvertType(E->getType());
  9539. Value *X = EmitScalarExpr(E->getArg(0));
  9540. Value *Y = EmitScalarExpr(E->getArg(1));
  9541. // Constant-fold the M4 mask argument.
  9542. llvm::APSInt M4;
  9543. bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
  9544. assert(IsConstM4 && "Constant arg isn't actually constant?");
  9545. (void)IsConstM4;
  9546. // Check whether this instance can be represented via a LLVM standard
  9547. // intrinsic. We only support some values of M4.
  9548. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  9549. switch (M4.getZExtValue()) {
  9550. default: break;
  9551. case 4: ID = Intrinsic::minnum; break;
  9552. }
  9553. if (ID != Intrinsic::not_intrinsic) {
  9554. Function *F = CGM.getIntrinsic(ID, ResultType);
  9555. return Builder.CreateCall(F, {X, Y});
  9556. }
  9557. switch (BuiltinID) {
  9558. case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
  9559. case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
  9560. default: llvm_unreachable("Unknown BuiltinID");
  9561. }
  9562. Function *F = CGM.getIntrinsic(ID);
  9563. Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
  9564. return Builder.CreateCall(F, {X, Y, M4Value});
  9565. }
  9566. // Vector intrisincs that output the post-instruction CC value.
  9567. #define INTRINSIC_WITH_CC(NAME) \
  9568. case SystemZ::BI__builtin_##NAME: \
  9569. return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
  9570. INTRINSIC_WITH_CC(s390_vpkshs);
  9571. INTRINSIC_WITH_CC(s390_vpksfs);
  9572. INTRINSIC_WITH_CC(s390_vpksgs);
  9573. INTRINSIC_WITH_CC(s390_vpklshs);
  9574. INTRINSIC_WITH_CC(s390_vpklsfs);
  9575. INTRINSIC_WITH_CC(s390_vpklsgs);
  9576. INTRINSIC_WITH_CC(s390_vceqbs);
  9577. INTRINSIC_WITH_CC(s390_vceqhs);
  9578. INTRINSIC_WITH_CC(s390_vceqfs);
  9579. INTRINSIC_WITH_CC(s390_vceqgs);
  9580. INTRINSIC_WITH_CC(s390_vchbs);
  9581. INTRINSIC_WITH_CC(s390_vchhs);
  9582. INTRINSIC_WITH_CC(s390_vchfs);
  9583. INTRINSIC_WITH_CC(s390_vchgs);
  9584. INTRINSIC_WITH_CC(s390_vchlbs);
  9585. INTRINSIC_WITH_CC(s390_vchlhs);
  9586. INTRINSIC_WITH_CC(s390_vchlfs);
  9587. INTRINSIC_WITH_CC(s390_vchlgs);
  9588. INTRINSIC_WITH_CC(s390_vfaebs);
  9589. INTRINSIC_WITH_CC(s390_vfaehs);
  9590. INTRINSIC_WITH_CC(s390_vfaefs);
  9591. INTRINSIC_WITH_CC(s390_vfaezbs);
  9592. INTRINSIC_WITH_CC(s390_vfaezhs);
  9593. INTRINSIC_WITH_CC(s390_vfaezfs);
  9594. INTRINSIC_WITH_CC(s390_vfeebs);
  9595. INTRINSIC_WITH_CC(s390_vfeehs);
  9596. INTRINSIC_WITH_CC(s390_vfeefs);
  9597. INTRINSIC_WITH_CC(s390_vfeezbs);
  9598. INTRINSIC_WITH_CC(s390_vfeezhs);
  9599. INTRINSIC_WITH_CC(s390_vfeezfs);
  9600. INTRINSIC_WITH_CC(s390_vfenebs);
  9601. INTRINSIC_WITH_CC(s390_vfenehs);
  9602. INTRINSIC_WITH_CC(s390_vfenefs);
  9603. INTRINSIC_WITH_CC(s390_vfenezbs);
  9604. INTRINSIC_WITH_CC(s390_vfenezhs);
  9605. INTRINSIC_WITH_CC(s390_vfenezfs);
  9606. INTRINSIC_WITH_CC(s390_vistrbs);
  9607. INTRINSIC_WITH_CC(s390_vistrhs);
  9608. INTRINSIC_WITH_CC(s390_vistrfs);
  9609. INTRINSIC_WITH_CC(s390_vstrcbs);
  9610. INTRINSIC_WITH_CC(s390_vstrchs);
  9611. INTRINSIC_WITH_CC(s390_vstrcfs);
  9612. INTRINSIC_WITH_CC(s390_vstrczbs);
  9613. INTRINSIC_WITH_CC(s390_vstrczhs);
  9614. INTRINSIC_WITH_CC(s390_vstrczfs);
  9615. INTRINSIC_WITH_CC(s390_vfcesbs);
  9616. INTRINSIC_WITH_CC(s390_vfcedbs);
  9617. INTRINSIC_WITH_CC(s390_vfchsbs);
  9618. INTRINSIC_WITH_CC(s390_vfchdbs);
  9619. INTRINSIC_WITH_CC(s390_vfchesbs);
  9620. INTRINSIC_WITH_CC(s390_vfchedbs);
  9621. INTRINSIC_WITH_CC(s390_vftcisb);
  9622. INTRINSIC_WITH_CC(s390_vftcidb);
  9623. #undef INTRINSIC_WITH_CC
  9624. default:
  9625. return nullptr;
  9626. }
  9627. }
  9628. Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
  9629. const CallExpr *E) {
  9630. auto MakeLdg = [&](unsigned IntrinsicID) {
  9631. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9632. clang::CharUnits Align =
  9633. getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
  9634. return Builder.CreateCall(
  9635. CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
  9636. Ptr->getType()}),
  9637. {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
  9638. };
  9639. auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
  9640. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9641. return Builder.CreateCall(
  9642. CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
  9643. Ptr->getType()}),
  9644. {Ptr, EmitScalarExpr(E->getArg(1))});
  9645. };
  9646. switch (BuiltinID) {
  9647. case NVPTX::BI__nvvm_atom_add_gen_i:
  9648. case NVPTX::BI__nvvm_atom_add_gen_l:
  9649. case NVPTX::BI__nvvm_atom_add_gen_ll:
  9650. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
  9651. case NVPTX::BI__nvvm_atom_sub_gen_i:
  9652. case NVPTX::BI__nvvm_atom_sub_gen_l:
  9653. case NVPTX::BI__nvvm_atom_sub_gen_ll:
  9654. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
  9655. case NVPTX::BI__nvvm_atom_and_gen_i:
  9656. case NVPTX::BI__nvvm_atom_and_gen_l:
  9657. case NVPTX::BI__nvvm_atom_and_gen_ll:
  9658. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
  9659. case NVPTX::BI__nvvm_atom_or_gen_i:
  9660. case NVPTX::BI__nvvm_atom_or_gen_l:
  9661. case NVPTX::BI__nvvm_atom_or_gen_ll:
  9662. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
  9663. case NVPTX::BI__nvvm_atom_xor_gen_i:
  9664. case NVPTX::BI__nvvm_atom_xor_gen_l:
  9665. case NVPTX::BI__nvvm_atom_xor_gen_ll:
  9666. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
  9667. case NVPTX::BI__nvvm_atom_xchg_gen_i:
  9668. case NVPTX::BI__nvvm_atom_xchg_gen_l:
  9669. case NVPTX::BI__nvvm_atom_xchg_gen_ll:
  9670. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
  9671. case NVPTX::BI__nvvm_atom_max_gen_i:
  9672. case NVPTX::BI__nvvm_atom_max_gen_l:
  9673. case NVPTX::BI__nvvm_atom_max_gen_ll:
  9674. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
  9675. case NVPTX::BI__nvvm_atom_max_gen_ui:
  9676. case NVPTX::BI__nvvm_atom_max_gen_ul:
  9677. case NVPTX::BI__nvvm_atom_max_gen_ull:
  9678. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
  9679. case NVPTX::BI__nvvm_atom_min_gen_i:
  9680. case NVPTX::BI__nvvm_atom_min_gen_l:
  9681. case NVPTX::BI__nvvm_atom_min_gen_ll:
  9682. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
  9683. case NVPTX::BI__nvvm_atom_min_gen_ui:
  9684. case NVPTX::BI__nvvm_atom_min_gen_ul:
  9685. case NVPTX::BI__nvvm_atom_min_gen_ull:
  9686. return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
  9687. case NVPTX::BI__nvvm_atom_cas_gen_i:
  9688. case NVPTX::BI__nvvm_atom_cas_gen_l:
  9689. case NVPTX::BI__nvvm_atom_cas_gen_ll:
  9690. // __nvvm_atom_cas_gen_* should return the old value rather than the
  9691. // success flag.
  9692. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
  9693. case NVPTX::BI__nvvm_atom_add_gen_f: {
  9694. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9695. Value *Val = EmitScalarExpr(E->getArg(1));
  9696. // atomicrmw only deals with integer arguments so we need to use
  9697. // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
  9698. Value *FnALAF32 =
  9699. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
  9700. return Builder.CreateCall(FnALAF32, {Ptr, Val});
  9701. }
  9702. case NVPTX::BI__nvvm_atom_add_gen_d: {
  9703. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9704. Value *Val = EmitScalarExpr(E->getArg(1));
  9705. // atomicrmw only deals with integer arguments, so we need to use
  9706. // LLVM's nvvm_atomic_load_add_f64 intrinsic.
  9707. Value *FnALAF64 =
  9708. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType());
  9709. return Builder.CreateCall(FnALAF64, {Ptr, Val});
  9710. }
  9711. case NVPTX::BI__nvvm_atom_inc_gen_ui: {
  9712. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9713. Value *Val = EmitScalarExpr(E->getArg(1));
  9714. Value *FnALI32 =
  9715. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
  9716. return Builder.CreateCall(FnALI32, {Ptr, Val});
  9717. }
  9718. case NVPTX::BI__nvvm_atom_dec_gen_ui: {
  9719. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9720. Value *Val = EmitScalarExpr(E->getArg(1));
  9721. Value *FnALD32 =
  9722. CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
  9723. return Builder.CreateCall(FnALD32, {Ptr, Val});
  9724. }
  9725. case NVPTX::BI__nvvm_ldg_c:
  9726. case NVPTX::BI__nvvm_ldg_c2:
  9727. case NVPTX::BI__nvvm_ldg_c4:
  9728. case NVPTX::BI__nvvm_ldg_s:
  9729. case NVPTX::BI__nvvm_ldg_s2:
  9730. case NVPTX::BI__nvvm_ldg_s4:
  9731. case NVPTX::BI__nvvm_ldg_i:
  9732. case NVPTX::BI__nvvm_ldg_i2:
  9733. case NVPTX::BI__nvvm_ldg_i4:
  9734. case NVPTX::BI__nvvm_ldg_l:
  9735. case NVPTX::BI__nvvm_ldg_ll:
  9736. case NVPTX::BI__nvvm_ldg_ll2:
  9737. case NVPTX::BI__nvvm_ldg_uc:
  9738. case NVPTX::BI__nvvm_ldg_uc2:
  9739. case NVPTX::BI__nvvm_ldg_uc4:
  9740. case NVPTX::BI__nvvm_ldg_us:
  9741. case NVPTX::BI__nvvm_ldg_us2:
  9742. case NVPTX::BI__nvvm_ldg_us4:
  9743. case NVPTX::BI__nvvm_ldg_ui:
  9744. case NVPTX::BI__nvvm_ldg_ui2:
  9745. case NVPTX::BI__nvvm_ldg_ui4:
  9746. case NVPTX::BI__nvvm_ldg_ul:
  9747. case NVPTX::BI__nvvm_ldg_ull:
  9748. case NVPTX::BI__nvvm_ldg_ull2:
  9749. // PTX Interoperability section 2.2: "For a vector with an even number of
  9750. // elements, its alignment is set to number of elements times the alignment
  9751. // of its member: n*alignof(t)."
  9752. return MakeLdg(Intrinsic::nvvm_ldg_global_i);
  9753. case NVPTX::BI__nvvm_ldg_f:
  9754. case NVPTX::BI__nvvm_ldg_f2:
  9755. case NVPTX::BI__nvvm_ldg_f4:
  9756. case NVPTX::BI__nvvm_ldg_d:
  9757. case NVPTX::BI__nvvm_ldg_d2:
  9758. return MakeLdg(Intrinsic::nvvm_ldg_global_f);
  9759. case NVPTX::BI__nvvm_atom_cta_add_gen_i:
  9760. case NVPTX::BI__nvvm_atom_cta_add_gen_l:
  9761. case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
  9762. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
  9763. case NVPTX::BI__nvvm_atom_sys_add_gen_i:
  9764. case NVPTX::BI__nvvm_atom_sys_add_gen_l:
  9765. case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
  9766. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
  9767. case NVPTX::BI__nvvm_atom_cta_add_gen_f:
  9768. case NVPTX::BI__nvvm_atom_cta_add_gen_d:
  9769. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
  9770. case NVPTX::BI__nvvm_atom_sys_add_gen_f:
  9771. case NVPTX::BI__nvvm_atom_sys_add_gen_d:
  9772. return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
  9773. case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
  9774. case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
  9775. case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
  9776. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
  9777. case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
  9778. case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
  9779. case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
  9780. return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
  9781. case NVPTX::BI__nvvm_atom_cta_max_gen_i:
  9782. case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
  9783. case NVPTX::BI__nvvm_atom_cta_max_gen_l:
  9784. case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
  9785. case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
  9786. case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
  9787. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
  9788. case NVPTX::BI__nvvm_atom_sys_max_gen_i:
  9789. case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
  9790. case NVPTX::BI__nvvm_atom_sys_max_gen_l:
  9791. case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
  9792. case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
  9793. case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
  9794. return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
  9795. case NVPTX::BI__nvvm_atom_cta_min_gen_i:
  9796. case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
  9797. case NVPTX::BI__nvvm_atom_cta_min_gen_l:
  9798. case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
  9799. case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
  9800. case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
  9801. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
  9802. case NVPTX::BI__nvvm_atom_sys_min_gen_i:
  9803. case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
  9804. case NVPTX::BI__nvvm_atom_sys_min_gen_l:
  9805. case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
  9806. case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
  9807. case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
  9808. return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
  9809. case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
  9810. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
  9811. case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
  9812. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
  9813. case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
  9814. return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
  9815. case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
  9816. return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
  9817. case NVPTX::BI__nvvm_atom_cta_and_gen_i:
  9818. case NVPTX::BI__nvvm_atom_cta_and_gen_l:
  9819. case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
  9820. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
  9821. case NVPTX::BI__nvvm_atom_sys_and_gen_i:
  9822. case NVPTX::BI__nvvm_atom_sys_and_gen_l:
  9823. case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
  9824. return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
  9825. case NVPTX::BI__nvvm_atom_cta_or_gen_i:
  9826. case NVPTX::BI__nvvm_atom_cta_or_gen_l:
  9827. case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
  9828. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
  9829. case NVPTX::BI__nvvm_atom_sys_or_gen_i:
  9830. case NVPTX::BI__nvvm_atom_sys_or_gen_l:
  9831. case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
  9832. return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
  9833. case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
  9834. case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
  9835. case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
  9836. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
  9837. case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
  9838. case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
  9839. case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
  9840. return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
  9841. case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
  9842. case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
  9843. case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
  9844. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9845. return Builder.CreateCall(
  9846. CGM.getIntrinsic(
  9847. Intrinsic::nvvm_atomic_cas_gen_i_cta,
  9848. {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
  9849. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  9850. }
  9851. case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
  9852. case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
  9853. case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
  9854. Value *Ptr = EmitScalarExpr(E->getArg(0));
  9855. return Builder.CreateCall(
  9856. CGM.getIntrinsic(
  9857. Intrinsic::nvvm_atomic_cas_gen_i_sys,
  9858. {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
  9859. {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
  9860. }
  9861. case NVPTX::BI__nvvm_match_all_sync_i32p:
  9862. case NVPTX::BI__nvvm_match_all_sync_i64p: {
  9863. Value *Mask = EmitScalarExpr(E->getArg(0));
  9864. Value *Val = EmitScalarExpr(E->getArg(1));
  9865. Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
  9866. Value *ResultPair = Builder.CreateCall(
  9867. CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
  9868. ? Intrinsic::nvvm_match_all_sync_i32p
  9869. : Intrinsic::nvvm_match_all_sync_i64p),
  9870. {Mask, Val});
  9871. Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
  9872. PredOutPtr.getElementType());
  9873. Builder.CreateStore(Pred, PredOutPtr);
  9874. return Builder.CreateExtractValue(ResultPair, 0);
  9875. }
  9876. case NVPTX::BI__hmma_m16n16k16_ld_a:
  9877. case NVPTX::BI__hmma_m16n16k16_ld_b:
  9878. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  9879. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  9880. case NVPTX::BI__hmma_m32n8k16_ld_a:
  9881. case NVPTX::BI__hmma_m32n8k16_ld_b:
  9882. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  9883. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  9884. case NVPTX::BI__hmma_m8n32k16_ld_a:
  9885. case NVPTX::BI__hmma_m8n32k16_ld_b:
  9886. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  9887. case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
  9888. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  9889. Value *Src = EmitScalarExpr(E->getArg(1));
  9890. Value *Ldm = EmitScalarExpr(E->getArg(2));
  9891. llvm::APSInt isColMajorArg;
  9892. if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
  9893. return nullptr;
  9894. bool isColMajor = isColMajorArg.getSExtValue();
  9895. unsigned IID;
  9896. unsigned NumResults;
  9897. switch (BuiltinID) {
  9898. case NVPTX::BI__hmma_m16n16k16_ld_a:
  9899. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
  9900. : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
  9901. NumResults = 8;
  9902. break;
  9903. case NVPTX::BI__hmma_m16n16k16_ld_b:
  9904. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
  9905. : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
  9906. NumResults = 8;
  9907. break;
  9908. case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
  9909. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
  9910. : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
  9911. NumResults = 4;
  9912. break;
  9913. case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
  9914. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
  9915. : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
  9916. NumResults = 8;
  9917. break;
  9918. case NVPTX::BI__hmma_m32n8k16_ld_a:
  9919. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
  9920. : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
  9921. NumResults = 8;
  9922. break;
  9923. case NVPTX::BI__hmma_m32n8k16_ld_b:
  9924. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
  9925. : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
  9926. NumResults = 8;
  9927. break;
  9928. case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
  9929. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
  9930. : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
  9931. NumResults = 4;
  9932. break;
  9933. case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
  9934. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
  9935. : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
  9936. NumResults = 8;
  9937. break;
  9938. case NVPTX::BI__hmma_m8n32k16_ld_a:
  9939. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
  9940. : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
  9941. NumResults = 8;
  9942. break;
  9943. case NVPTX::BI__hmma_m8n32k16_ld_b:
  9944. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
  9945. : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
  9946. NumResults = 8;
  9947. break;
  9948. case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
  9949. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
  9950. : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
  9951. NumResults = 4;
  9952. break;
  9953. case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
  9954. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
  9955. : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
  9956. NumResults = 8;
  9957. break;
  9958. default:
  9959. llvm_unreachable("Unexpected builtin ID.");
  9960. }
  9961. Value *Result =
  9962. Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
  9963. // Save returned values.
  9964. for (unsigned i = 0; i < NumResults; ++i) {
  9965. Builder.CreateAlignedStore(
  9966. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
  9967. Dst.getElementType()),
  9968. Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
  9969. CharUnits::fromQuantity(4));
  9970. }
  9971. return Result;
  9972. }
  9973. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  9974. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  9975. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  9976. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  9977. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  9978. case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
  9979. Value *Dst = EmitScalarExpr(E->getArg(0));
  9980. Address Src = EmitPointerWithAlignment(E->getArg(1));
  9981. Value *Ldm = EmitScalarExpr(E->getArg(2));
  9982. llvm::APSInt isColMajorArg;
  9983. if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
  9984. return nullptr;
  9985. bool isColMajor = isColMajorArg.getSExtValue();
  9986. unsigned IID;
  9987. unsigned NumResults = 8;
  9988. // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
  9989. // for some reason nvcc builtins use _c_.
  9990. switch (BuiltinID) {
  9991. case NVPTX::BI__hmma_m16n16k16_st_c_f16:
  9992. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
  9993. : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
  9994. NumResults = 4;
  9995. break;
  9996. case NVPTX::BI__hmma_m16n16k16_st_c_f32:
  9997. IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
  9998. : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
  9999. break;
  10000. case NVPTX::BI__hmma_m32n8k16_st_c_f16:
  10001. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
  10002. : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
  10003. NumResults = 4;
  10004. break;
  10005. case NVPTX::BI__hmma_m32n8k16_st_c_f32:
  10006. IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
  10007. : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
  10008. break;
  10009. case NVPTX::BI__hmma_m8n32k16_st_c_f16:
  10010. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
  10011. : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
  10012. NumResults = 4;
  10013. break;
  10014. case NVPTX::BI__hmma_m8n32k16_st_c_f32:
  10015. IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
  10016. : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
  10017. break;
  10018. default:
  10019. llvm_unreachable("Unexpected builtin ID.");
  10020. }
  10021. Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType());
  10022. llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
  10023. SmallVector<Value *, 10> Values = {Dst};
  10024. for (unsigned i = 0; i < NumResults; ++i) {
  10025. Value *V = Builder.CreateAlignedLoad(
  10026. Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
  10027. CharUnits::fromQuantity(4));
  10028. Values.push_back(Builder.CreateBitCast(V, ParamType));
  10029. }
  10030. Values.push_back(Ldm);
  10031. Value *Result = Builder.CreateCall(Intrinsic, Values);
  10032. return Result;
  10033. }
  10034. // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
  10035. // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
  10036. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  10037. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  10038. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  10039. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  10040. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  10041. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  10042. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  10043. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  10044. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  10045. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  10046. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  10047. case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
  10048. Address Dst = EmitPointerWithAlignment(E->getArg(0));
  10049. Address SrcA = EmitPointerWithAlignment(E->getArg(1));
  10050. Address SrcB = EmitPointerWithAlignment(E->getArg(2));
  10051. Address SrcC = EmitPointerWithAlignment(E->getArg(3));
  10052. llvm::APSInt LayoutArg;
  10053. if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
  10054. return nullptr;
  10055. int Layout = LayoutArg.getSExtValue();
  10056. if (Layout < 0 || Layout > 3)
  10057. return nullptr;
  10058. llvm::APSInt SatfArg;
  10059. if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
  10060. return nullptr;
  10061. bool Satf = SatfArg.getSExtValue();
  10062. // clang-format off
  10063. #define MMA_VARIANTS(geom, type) {{ \
  10064. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
  10065. Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
  10066. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
  10067. Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
  10068. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
  10069. Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
  10070. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
  10071. Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
  10072. }}
  10073. // clang-format on
  10074. auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
  10075. unsigned Index = Layout * 2 + Satf;
  10076. assert(Index < 8);
  10077. return Variants[Index];
  10078. };
  10079. unsigned IID;
  10080. unsigned NumEltsC;
  10081. unsigned NumEltsD;
  10082. switch (BuiltinID) {
  10083. case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
  10084. IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
  10085. NumEltsC = 4;
  10086. NumEltsD = 4;
  10087. break;
  10088. case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
  10089. IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
  10090. NumEltsC = 4;
  10091. NumEltsD = 8;
  10092. break;
  10093. case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
  10094. IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
  10095. NumEltsC = 8;
  10096. NumEltsD = 4;
  10097. break;
  10098. case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
  10099. IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
  10100. NumEltsC = 8;
  10101. NumEltsD = 8;
  10102. break;
  10103. case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
  10104. IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
  10105. NumEltsC = 4;
  10106. NumEltsD = 4;
  10107. break;
  10108. case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
  10109. IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
  10110. NumEltsC = 4;
  10111. NumEltsD = 8;
  10112. break;
  10113. case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
  10114. IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
  10115. NumEltsC = 8;
  10116. NumEltsD = 4;
  10117. break;
  10118. case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
  10119. IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
  10120. NumEltsC = 8;
  10121. NumEltsD = 8;
  10122. break;
  10123. case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
  10124. IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
  10125. NumEltsC = 4;
  10126. NumEltsD = 4;
  10127. break;
  10128. case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
  10129. IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
  10130. NumEltsC = 4;
  10131. NumEltsD = 8;
  10132. break;
  10133. case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
  10134. IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
  10135. NumEltsC = 8;
  10136. NumEltsD = 4;
  10137. break;
  10138. case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
  10139. IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
  10140. NumEltsC = 8;
  10141. NumEltsD = 8;
  10142. break;
  10143. default:
  10144. llvm_unreachable("Unexpected builtin ID.");
  10145. }
  10146. #undef MMA_VARIANTS
  10147. SmallVector<Value *, 24> Values;
  10148. Function *Intrinsic = CGM.getIntrinsic(IID);
  10149. llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
  10150. // Load A
  10151. for (unsigned i = 0; i < 8; ++i) {
  10152. Value *V = Builder.CreateAlignedLoad(
  10153. Builder.CreateGEP(SrcA.getPointer(),
  10154. llvm::ConstantInt::get(IntTy, i)),
  10155. CharUnits::fromQuantity(4));
  10156. Values.push_back(Builder.CreateBitCast(V, ABType));
  10157. }
  10158. // Load B
  10159. for (unsigned i = 0; i < 8; ++i) {
  10160. Value *V = Builder.CreateAlignedLoad(
  10161. Builder.CreateGEP(SrcB.getPointer(),
  10162. llvm::ConstantInt::get(IntTy, i)),
  10163. CharUnits::fromQuantity(4));
  10164. Values.push_back(Builder.CreateBitCast(V, ABType));
  10165. }
  10166. // Load C
  10167. llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
  10168. for (unsigned i = 0; i < NumEltsC; ++i) {
  10169. Value *V = Builder.CreateAlignedLoad(
  10170. Builder.CreateGEP(SrcC.getPointer(),
  10171. llvm::ConstantInt::get(IntTy, i)),
  10172. CharUnits::fromQuantity(4));
  10173. Values.push_back(Builder.CreateBitCast(V, CType));
  10174. }
  10175. Value *Result = Builder.CreateCall(Intrinsic, Values);
  10176. llvm::Type *DType = Dst.getElementType();
  10177. for (unsigned i = 0; i < NumEltsD; ++i)
  10178. Builder.CreateAlignedStore(
  10179. Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
  10180. Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
  10181. CharUnits::fromQuantity(4));
  10182. return Result;
  10183. }
  10184. default:
  10185. return nullptr;
  10186. }
  10187. }
  10188. Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
  10189. const CallExpr *E) {
  10190. switch (BuiltinID) {
  10191. case WebAssembly::BI__builtin_wasm_mem_size: {
  10192. llvm::Type *ResultType = ConvertType(E->getType());
  10193. Value *I = EmitScalarExpr(E->getArg(0));
  10194. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
  10195. return Builder.CreateCall(Callee, I);
  10196. }
  10197. case WebAssembly::BI__builtin_wasm_mem_grow: {
  10198. llvm::Type *ResultType = ConvertType(E->getType());
  10199. Value *Args[] = {
  10200. EmitScalarExpr(E->getArg(0)),
  10201. EmitScalarExpr(E->getArg(1))
  10202. };
  10203. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
  10204. return Builder.CreateCall(Callee, Args);
  10205. }
  10206. case WebAssembly::BI__builtin_wasm_current_memory: {
  10207. llvm::Type *ResultType = ConvertType(E->getType());
  10208. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
  10209. return Builder.CreateCall(Callee);
  10210. }
  10211. case WebAssembly::BI__builtin_wasm_grow_memory: {
  10212. Value *X = EmitScalarExpr(E->getArg(0));
  10213. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
  10214. return Builder.CreateCall(Callee, X);
  10215. }
  10216. case WebAssembly::BI__builtin_wasm_throw: {
  10217. Value *Tag = EmitScalarExpr(E->getArg(0));
  10218. Value *Obj = EmitScalarExpr(E->getArg(1));
  10219. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
  10220. return Builder.CreateCall(Callee, {Tag, Obj});
  10221. }
  10222. case WebAssembly::BI__builtin_wasm_rethrow: {
  10223. Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
  10224. return Builder.CreateCall(Callee);
  10225. }
  10226. default:
  10227. return nullptr;
  10228. }
  10229. }
  10230. Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
  10231. const CallExpr *E) {
  10232. SmallVector<llvm::Value *, 4> Ops;
  10233. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  10234. auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
  10235. // The base pointer is passed by address, so it needs to be loaded.
  10236. Address BP = EmitPointerWithAlignment(E->getArg(0));
  10237. BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
  10238. BP.getAlignment());
  10239. llvm::Value *Base = Builder.CreateLoad(BP);
  10240. // Operands are Base, Increment, Modifier, Start.
  10241. if (HasImm)
  10242. Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
  10243. EmitScalarExpr(E->getArg(3)) };
  10244. else
  10245. Ops = { Base, EmitScalarExpr(E->getArg(1)),
  10246. EmitScalarExpr(E->getArg(2)) };
  10247. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
  10248. llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
  10249. llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
  10250. NewBase->getType()->getPointerTo());
  10251. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  10252. // The intrinsic generates two results. The new value for the base pointer
  10253. // needs to be stored.
  10254. Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
  10255. return Builder.CreateExtractValue(Result, 0);
  10256. };
  10257. auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
  10258. // The base pointer is passed by address, so it needs to be loaded.
  10259. Address BP = EmitPointerWithAlignment(E->getArg(0));
  10260. BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
  10261. BP.getAlignment());
  10262. llvm::Value *Base = Builder.CreateLoad(BP);
  10263. // Operands are Base, Increment, Modifier, Value, Start.
  10264. if (HasImm)
  10265. Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
  10266. EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
  10267. else
  10268. Ops = { Base, EmitScalarExpr(E->getArg(1)),
  10269. EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
  10270. llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
  10271. llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
  10272. NewBase->getType()->getPointerTo());
  10273. Address Dest = EmitPointerWithAlignment(E->getArg(0));
  10274. // The intrinsic generates one result, which is the new value for the base
  10275. // pointer. It needs to be stored.
  10276. return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
  10277. };
  10278. // Handle the conversion of bit-reverse load intrinsics to bit code.
  10279. // The intrinsic call after this function only reads from memory and the
  10280. // write to memory is dealt by the store instruction.
  10281. auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
  10282. // The intrinsic generates one result, which is the new value for the base
  10283. // pointer. It needs to be returned. The result of the load instruction is
  10284. // passed to intrinsic by address, so the value needs to be stored.
  10285. llvm::Value *BaseAddress =
  10286. Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
  10287. // Expressions like &(*pt++) will be incremented per evaluation.
  10288. // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
  10289. // per call.
  10290. Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
  10291. DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
  10292. DestAddr.getAlignment());
  10293. llvm::Value *DestAddress = DestAddr.getPointer();
  10294. // Operands are Base, Dest, Modifier.
  10295. // The intrinsic format in LLVM IR is defined as
  10296. // { ValueType, i8* } (i8*, i32).
  10297. Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
  10298. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
  10299. // The value needs to be stored as the variable is passed by reference.
  10300. llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
  10301. // The store needs to be truncated to fit the destination type.
  10302. // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
  10303. // to be handled with stores of respective destination type.
  10304. DestVal = Builder.CreateTrunc(DestVal, DestTy);
  10305. llvm::Value *DestForStore =
  10306. Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
  10307. Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
  10308. // The updated value of the base pointer is returned.
  10309. return Builder.CreateExtractValue(Result, 1);
  10310. };
  10311. switch (BuiltinID) {
  10312. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
  10313. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
  10314. Address Dest = EmitPointerWithAlignment(E->getArg(2));
  10315. unsigned Size;
  10316. if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
  10317. Size = 512;
  10318. ID = Intrinsic::hexagon_V6_vaddcarry;
  10319. } else {
  10320. Size = 1024;
  10321. ID = Intrinsic::hexagon_V6_vaddcarry_128B;
  10322. }
  10323. Dest = Builder.CreateBitCast(Dest,
  10324. llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
  10325. LoadInst *QLd = Builder.CreateLoad(Dest);
  10326. Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
  10327. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  10328. llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
  10329. llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
  10330. Vprd->getType()->getPointerTo(0));
  10331. Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
  10332. return Builder.CreateExtractValue(Result, 0);
  10333. }
  10334. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
  10335. case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
  10336. Address Dest = EmitPointerWithAlignment(E->getArg(2));
  10337. unsigned Size;
  10338. if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
  10339. Size = 512;
  10340. ID = Intrinsic::hexagon_V6_vsubcarry;
  10341. } else {
  10342. Size = 1024;
  10343. ID = Intrinsic::hexagon_V6_vsubcarry_128B;
  10344. }
  10345. Dest = Builder.CreateBitCast(Dest,
  10346. llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
  10347. LoadInst *QLd = Builder.CreateLoad(Dest);
  10348. Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
  10349. llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
  10350. llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
  10351. llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
  10352. Vprd->getType()->getPointerTo(0));
  10353. Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
  10354. return Builder.CreateExtractValue(Result, 0);
  10355. }
  10356. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
  10357. return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
  10358. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
  10359. return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
  10360. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
  10361. return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
  10362. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
  10363. return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
  10364. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
  10365. return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
  10366. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
  10367. return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
  10368. case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
  10369. return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
  10370. case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
  10371. return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
  10372. case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
  10373. return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
  10374. case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
  10375. return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
  10376. case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
  10377. return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
  10378. case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
  10379. return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
  10380. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
  10381. return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
  10382. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
  10383. return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
  10384. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
  10385. return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
  10386. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
  10387. return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
  10388. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
  10389. return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
  10390. case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
  10391. return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
  10392. case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
  10393. return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
  10394. case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
  10395. return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
  10396. case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
  10397. return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
  10398. case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
  10399. return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
  10400. case Hexagon::BI__builtin_brev_ldub:
  10401. return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
  10402. case Hexagon::BI__builtin_brev_ldb:
  10403. return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
  10404. case Hexagon::BI__builtin_brev_lduh:
  10405. return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
  10406. case Hexagon::BI__builtin_brev_ldh:
  10407. return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
  10408. case Hexagon::BI__builtin_brev_ldw:
  10409. return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
  10410. case Hexagon::BI__builtin_brev_ldd:
  10411. return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
  10412. default:
  10413. break;
  10414. } // switch
  10415. return nullptr;
  10416. }