TargetLowering.cpp 238 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085
  1. //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This implements the TargetLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/CodeGen/TargetLowering.h"
  13. #include "llvm/ADT/BitVector.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/CodeGen/CallingConvLower.h"
  16. #include "llvm/CodeGen/MachineFrameInfo.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineJumpTableInfo.h"
  19. #include "llvm/CodeGen/MachineRegisterInfo.h"
  20. #include "llvm/CodeGen/SelectionDAG.h"
  21. #include "llvm/CodeGen/TargetRegisterInfo.h"
  22. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/DerivedTypes.h"
  25. #include "llvm/IR/GlobalVariable.h"
  26. #include "llvm/IR/LLVMContext.h"
  27. #include "llvm/MC/MCAsmInfo.h"
  28. #include "llvm/MC/MCExpr.h"
  29. #include "llvm/Support/ErrorHandling.h"
  30. #include "llvm/Support/KnownBits.h"
  31. #include "llvm/Support/MathExtras.h"
  32. #include "llvm/Target/TargetLoweringObjectFile.h"
  33. #include "llvm/Target/TargetMachine.h"
  34. #include <cctype>
  35. using namespace llvm;
  36. /// NOTE: The TargetMachine owns TLOF.
  37. TargetLowering::TargetLowering(const TargetMachine &tm)
  38. : TargetLoweringBase(tm) {}
  39. const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  40. return nullptr;
  41. }
  42. bool TargetLowering::isPositionIndependent() const {
  43. return getTargetMachine().isPositionIndependent();
  44. }
  45. /// Check whether a given call node is in tail position within its function. If
  46. /// so, it sets Chain to the input chain of the tail call.
  47. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  48. SDValue &Chain) const {
  49. const Function &F = DAG.getMachineFunction().getFunction();
  50. // Conservatively require the attributes of the call to match those of
  51. // the return. Ignore NoAlias and NonNull because they don't affect the
  52. // call sequence.
  53. AttributeList CallerAttrs = F.getAttributes();
  54. if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
  55. .removeAttribute(Attribute::NoAlias)
  56. .removeAttribute(Attribute::NonNull)
  57. .hasAttributes())
  58. return false;
  59. // It's not safe to eliminate the sign / zero extension of the return value.
  60. if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
  61. CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
  62. return false;
  63. // Check if the only use is a function return node.
  64. return isUsedByReturnOnly(Node, Chain);
  65. }
  66. bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  67. const uint32_t *CallerPreservedMask,
  68. const SmallVectorImpl<CCValAssign> &ArgLocs,
  69. const SmallVectorImpl<SDValue> &OutVals) const {
  70. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  71. const CCValAssign &ArgLoc = ArgLocs[I];
  72. if (!ArgLoc.isRegLoc())
  73. continue;
  74. unsigned Reg = ArgLoc.getLocReg();
  75. // Only look at callee saved registers.
  76. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  77. continue;
  78. // Check that we pass the value used for the caller.
  79. // (We look for a CopyFromReg reading a virtual register that is used
  80. // for the function live-in value of register Reg)
  81. SDValue Value = OutVals[I];
  82. if (Value->getOpcode() != ISD::CopyFromReg)
  83. return false;
  84. unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  85. if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  86. return false;
  87. }
  88. return true;
  89. }
  90. /// Set CallLoweringInfo attribute flags based on a call instruction
  91. /// and called function attributes.
  92. void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
  93. unsigned ArgIdx) {
  94. IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
  95. IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
  96. IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
  97. IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
  98. IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
  99. IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
  100. IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
  101. IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
  102. IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
  103. IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
  104. Alignment = Call->getParamAlignment(ArgIdx);
  105. ByValType = Call->getParamByValType(ArgIdx);
  106. }
  107. /// Generate a libcall taking the given operands as arguments and returning a
  108. /// result of type RetVT.
  109. std::pair<SDValue, SDValue>
  110. TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  111. ArrayRef<SDValue> Ops, bool isSigned,
  112. const SDLoc &dl, bool doesNotReturn,
  113. bool isReturnValueUsed,
  114. bool isPostTypeLegalization) const {
  115. TargetLowering::ArgListTy Args;
  116. Args.reserve(Ops.size());
  117. TargetLowering::ArgListEntry Entry;
  118. for (SDValue Op : Ops) {
  119. Entry.Node = Op;
  120. Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
  121. Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
  122. Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
  123. Args.push_back(Entry);
  124. }
  125. if (LC == RTLIB::UNKNOWN_LIBCALL)
  126. report_fatal_error("Unsupported library call operation!");
  127. SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
  128. getPointerTy(DAG.getDataLayout()));
  129. Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
  130. TargetLowering::CallLoweringInfo CLI(DAG);
  131. bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
  132. CLI.setDebugLoc(dl)
  133. .setChain(DAG.getEntryNode())
  134. .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
  135. .setNoReturn(doesNotReturn)
  136. .setDiscardResult(!isReturnValueUsed)
  137. .setIsPostTypeLegalization(isPostTypeLegalization)
  138. .setSExtResult(signExtend)
  139. .setZExtResult(!signExtend);
  140. return LowerCallTo(CLI);
  141. }
  142. bool
  143. TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
  144. unsigned Limit, uint64_t Size,
  145. unsigned DstAlign, unsigned SrcAlign,
  146. bool IsMemset,
  147. bool ZeroMemset,
  148. bool MemcpyStrSrc,
  149. bool AllowOverlap,
  150. unsigned DstAS, unsigned SrcAS,
  151. const AttributeList &FuncAttributes) const {
  152. // If 'SrcAlign' is zero, that means the memory operation does not need to
  153. // load the value, i.e. memset or memcpy from constant string. Otherwise,
  154. // it's the inferred alignment of the source. 'DstAlign', on the other hand,
  155. // is the specified alignment of the memory operation. If it is zero, that
  156. // means it's possible to change the alignment of the destination.
  157. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
  158. // not need to be loaded.
  159. if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
  160. return false;
  161. EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
  162. IsMemset, ZeroMemset, MemcpyStrSrc,
  163. FuncAttributes);
  164. if (VT == MVT::Other) {
  165. // Use the largest integer type whose alignment constraints are satisfied.
  166. // We only need to check DstAlign here as SrcAlign is always greater or
  167. // equal to DstAlign (or zero).
  168. VT = MVT::i64;
  169. while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
  170. !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
  171. VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
  172. assert(VT.isInteger());
  173. // Find the largest legal integer type.
  174. MVT LVT = MVT::i64;
  175. while (!isTypeLegal(LVT))
  176. LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
  177. assert(LVT.isInteger());
  178. // If the type we've chosen is larger than the largest legal integer type
  179. // then use that instead.
  180. if (VT.bitsGT(LVT))
  181. VT = LVT;
  182. }
  183. unsigned NumMemOps = 0;
  184. while (Size != 0) {
  185. unsigned VTSize = VT.getSizeInBits() / 8;
  186. while (VTSize > Size) {
  187. // For now, only use non-vector load / store's for the left-over pieces.
  188. EVT NewVT = VT;
  189. unsigned NewVTSize;
  190. bool Found = false;
  191. if (VT.isVector() || VT.isFloatingPoint()) {
  192. NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
  193. if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
  194. isSafeMemOpType(NewVT.getSimpleVT()))
  195. Found = true;
  196. else if (NewVT == MVT::i64 &&
  197. isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
  198. isSafeMemOpType(MVT::f64)) {
  199. // i64 is usually not legal on 32-bit targets, but f64 may be.
  200. NewVT = MVT::f64;
  201. Found = true;
  202. }
  203. }
  204. if (!Found) {
  205. do {
  206. NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
  207. if (NewVT == MVT::i8)
  208. break;
  209. } while (!isSafeMemOpType(NewVT.getSimpleVT()));
  210. }
  211. NewVTSize = NewVT.getSizeInBits() / 8;
  212. // If the new VT cannot cover all of the remaining bits, then consider
  213. // issuing a (or a pair of) unaligned and overlapping load / store.
  214. bool Fast;
  215. if (NumMemOps && AllowOverlap && NewVTSize < Size &&
  216. allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
  217. Fast)
  218. VTSize = Size;
  219. else {
  220. VT = NewVT;
  221. VTSize = NewVTSize;
  222. }
  223. }
  224. if (++NumMemOps > Limit)
  225. return false;
  226. MemOps.push_back(VT);
  227. Size -= VTSize;
  228. }
  229. return true;
  230. }
  231. /// Soften the operands of a comparison. This code is shared among BR_CC,
  232. /// SELECT_CC, and SETCC handlers.
  233. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  234. SDValue &NewLHS, SDValue &NewRHS,
  235. ISD::CondCode &CCCode,
  236. const SDLoc &dl) const {
  237. assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
  238. && "Unsupported setcc type!");
  239. // Expand into one or more soft-fp libcall(s).
  240. RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
  241. bool ShouldInvertCC = false;
  242. switch (CCCode) {
  243. case ISD::SETEQ:
  244. case ISD::SETOEQ:
  245. LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  246. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  247. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  248. break;
  249. case ISD::SETNE:
  250. case ISD::SETUNE:
  251. LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
  252. (VT == MVT::f64) ? RTLIB::UNE_F64 :
  253. (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
  254. break;
  255. case ISD::SETGE:
  256. case ISD::SETOGE:
  257. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  258. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  259. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  260. break;
  261. case ISD::SETLT:
  262. case ISD::SETOLT:
  263. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  264. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  265. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  266. break;
  267. case ISD::SETLE:
  268. case ISD::SETOLE:
  269. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  270. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  271. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  272. break;
  273. case ISD::SETGT:
  274. case ISD::SETOGT:
  275. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  276. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  277. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  278. break;
  279. case ISD::SETUO:
  280. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  281. (VT == MVT::f64) ? RTLIB::UO_F64 :
  282. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  283. break;
  284. case ISD::SETO:
  285. LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
  286. (VT == MVT::f64) ? RTLIB::O_F64 :
  287. (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
  288. break;
  289. case ISD::SETONE:
  290. // SETONE = SETOLT | SETOGT
  291. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  292. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  293. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  294. LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  295. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  296. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  297. break;
  298. case ISD::SETUEQ:
  299. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  300. (VT == MVT::f64) ? RTLIB::UO_F64 :
  301. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  302. LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  303. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  304. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  305. break;
  306. default:
  307. // Invert CC for unordered comparisons
  308. ShouldInvertCC = true;
  309. switch (CCCode) {
  310. case ISD::SETULT:
  311. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  312. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  313. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  314. break;
  315. case ISD::SETULE:
  316. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  317. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  318. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  319. break;
  320. case ISD::SETUGT:
  321. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  322. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  323. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  324. break;
  325. case ISD::SETUGE:
  326. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  327. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  328. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  329. break;
  330. default: llvm_unreachable("Do not know how to soften this setcc!");
  331. }
  332. }
  333. // Use the target specific return value for comparions lib calls.
  334. EVT RetVT = getCmpLibcallReturnType();
  335. SDValue Ops[2] = {NewLHS, NewRHS};
  336. NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
  337. dl).first;
  338. NewRHS = DAG.getConstant(0, dl, RetVT);
  339. CCCode = getCmpLibcallCC(LC1);
  340. if (ShouldInvertCC)
  341. CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
  342. if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
  343. SDValue Tmp = DAG.getNode(
  344. ISD::SETCC, dl,
  345. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
  346. NewLHS, NewRHS, DAG.getCondCode(CCCode));
  347. NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
  348. dl).first;
  349. NewLHS = DAG.getNode(
  350. ISD::SETCC, dl,
  351. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
  352. NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
  353. NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
  354. NewRHS = SDValue();
  355. }
  356. }
  357. /// Return the entry encoding for a jump table in the current function. The
  358. /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
  359. unsigned TargetLowering::getJumpTableEncoding() const {
  360. // In non-pic modes, just use the address of a block.
  361. if (!isPositionIndependent())
  362. return MachineJumpTableInfo::EK_BlockAddress;
  363. // In PIC mode, if the target supports a GPRel32 directive, use it.
  364. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
  365. return MachineJumpTableInfo::EK_GPRel32BlockAddress;
  366. // Otherwise, use a label difference.
  367. return MachineJumpTableInfo::EK_LabelDifference32;
  368. }
  369. SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
  370. SelectionDAG &DAG) const {
  371. // If our PIC model is GP relative, use the global offset table as the base.
  372. unsigned JTEncoding = getJumpTableEncoding();
  373. if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
  374. (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
  375. return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
  376. return Table;
  377. }
  378. /// This returns the relocation base for the given PIC jumptable, the same as
  379. /// getPICJumpTableRelocBase, but as an MCExpr.
  380. const MCExpr *
  381. TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
  382. unsigned JTI,MCContext &Ctx) const{
  383. // The normal PIC reloc base is the label at the start of the jump table.
  384. return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
  385. }
  386. bool
  387. TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  388. const TargetMachine &TM = getTargetMachine();
  389. const GlobalValue *GV = GA->getGlobal();
  390. // If the address is not even local to this DSO we will have to load it from
  391. // a got and then add the offset.
  392. if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
  393. return false;
  394. // If the code is position independent we will have to add a base register.
  395. if (isPositionIndependent())
  396. return false;
  397. // Otherwise we can do it.
  398. return true;
  399. }
  400. //===----------------------------------------------------------------------===//
  401. // Optimization Methods
  402. //===----------------------------------------------------------------------===//
  403. /// If the specified instruction has a constant integer operand and there are
  404. /// bits set in that constant that are not demanded, then clear those bits and
  405. /// return true.
  406. bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
  407. TargetLoweringOpt &TLO) const {
  408. SDLoc DL(Op);
  409. unsigned Opcode = Op.getOpcode();
  410. // Do target-specific constant optimization.
  411. if (targetShrinkDemandedConstant(Op, Demanded, TLO))
  412. return TLO.New.getNode();
  413. // FIXME: ISD::SELECT, ISD::SELECT_CC
  414. switch (Opcode) {
  415. default:
  416. break;
  417. case ISD::XOR:
  418. case ISD::AND:
  419. case ISD::OR: {
  420. auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  421. if (!Op1C)
  422. return false;
  423. // If this is a 'not' op, don't touch it because that's a canonical form.
  424. const APInt &C = Op1C->getAPIntValue();
  425. if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
  426. return false;
  427. if (!C.isSubsetOf(Demanded)) {
  428. EVT VT = Op.getValueType();
  429. SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
  430. SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
  431. return TLO.CombineTo(Op, NewOp);
  432. }
  433. break;
  434. }
  435. }
  436. return false;
  437. }
  438. /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
  439. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
  440. /// generalized for targets with other types of implicit widening casts.
  441. bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
  442. const APInt &Demanded,
  443. TargetLoweringOpt &TLO) const {
  444. assert(Op.getNumOperands() == 2 &&
  445. "ShrinkDemandedOp only supports binary operators!");
  446. assert(Op.getNode()->getNumValues() == 1 &&
  447. "ShrinkDemandedOp only supports nodes with one result!");
  448. SelectionDAG &DAG = TLO.DAG;
  449. SDLoc dl(Op);
  450. // Early return, as this function cannot handle vector types.
  451. if (Op.getValueType().isVector())
  452. return false;
  453. // Don't do this if the node has another user, which may require the
  454. // full value.
  455. if (!Op.getNode()->hasOneUse())
  456. return false;
  457. // Search for the smallest integer type with free casts to and from
  458. // Op's type. For expedience, just check power-of-2 integer types.
  459. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  460. unsigned DemandedSize = Demanded.getActiveBits();
  461. unsigned SmallVTBits = DemandedSize;
  462. if (!isPowerOf2_32(SmallVTBits))
  463. SmallVTBits = NextPowerOf2(SmallVTBits);
  464. for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
  465. EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
  466. if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
  467. TLI.isZExtFree(SmallVT, Op.getValueType())) {
  468. // We found a type with free casts.
  469. SDValue X = DAG.getNode(
  470. Op.getOpcode(), dl, SmallVT,
  471. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
  472. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
  473. assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
  474. SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
  475. return TLO.CombineTo(Op, Z);
  476. }
  477. }
  478. return false;
  479. }
  480. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  481. DAGCombinerInfo &DCI) const {
  482. SelectionDAG &DAG = DCI.DAG;
  483. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  484. !DCI.isBeforeLegalizeOps());
  485. KnownBits Known;
  486. bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
  487. if (Simplified) {
  488. DCI.AddToWorklist(Op.getNode());
  489. DCI.CommitTargetLoweringOpt(TLO);
  490. }
  491. return Simplified;
  492. }
  493. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  494. KnownBits &Known,
  495. TargetLoweringOpt &TLO,
  496. unsigned Depth,
  497. bool AssumeSingleUse) const {
  498. EVT VT = Op.getValueType();
  499. APInt DemandedElts = VT.isVector()
  500. ? APInt::getAllOnesValue(VT.getVectorNumElements())
  501. : APInt(1, 1);
  502. return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
  503. AssumeSingleUse);
  504. }
  505. /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
  506. /// result of Op are ever used downstream. If we can use this information to
  507. /// simplify Op, create a new simplified DAG node and return true, returning the
  508. /// original and new nodes in Old and New. Otherwise, analyze the expression and
  509. /// return a mask of Known bits for the expression (used to simplify the
  510. /// caller). The Known bits may only be accurate for those bits in the
  511. /// OriginalDemandedBits and OriginalDemandedElts.
  512. bool TargetLowering::SimplifyDemandedBits(
  513. SDValue Op, const APInt &OriginalDemandedBits,
  514. const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
  515. unsigned Depth, bool AssumeSingleUse) const {
  516. unsigned BitWidth = OriginalDemandedBits.getBitWidth();
  517. assert(Op.getScalarValueSizeInBits() == BitWidth &&
  518. "Mask size mismatches value type size!");
  519. unsigned NumElts = OriginalDemandedElts.getBitWidth();
  520. assert((!Op.getValueType().isVector() ||
  521. NumElts == Op.getValueType().getVectorNumElements()) &&
  522. "Unexpected vector size");
  523. APInt DemandedBits = OriginalDemandedBits;
  524. APInt DemandedElts = OriginalDemandedElts;
  525. SDLoc dl(Op);
  526. auto &DL = TLO.DAG.getDataLayout();
  527. // Don't know anything.
  528. Known = KnownBits(BitWidth);
  529. // Undef operand.
  530. if (Op.isUndef())
  531. return false;
  532. if (Op.getOpcode() == ISD::Constant) {
  533. // We know all of the bits for a constant!
  534. Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
  535. Known.Zero = ~Known.One;
  536. return false;
  537. }
  538. // Other users may use these bits.
  539. EVT VT = Op.getValueType();
  540. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
  541. if (Depth != 0) {
  542. // If not at the root, Just compute the Known bits to
  543. // simplify things downstream.
  544. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  545. return false;
  546. }
  547. // If this is the root being simplified, allow it to have multiple uses,
  548. // just set the DemandedBits/Elts to all bits.
  549. DemandedBits = APInt::getAllOnesValue(BitWidth);
  550. DemandedElts = APInt::getAllOnesValue(NumElts);
  551. } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
  552. // Not demanding any bits/elts from Op.
  553. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  554. } else if (Depth == 6) { // Limit search depth.
  555. return false;
  556. }
  557. KnownBits Known2, KnownOut;
  558. switch (Op.getOpcode()) {
  559. case ISD::SCALAR_TO_VECTOR: {
  560. if (!DemandedElts[0])
  561. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  562. KnownBits SrcKnown;
  563. SDValue Src = Op.getOperand(0);
  564. unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
  565. APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
  566. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
  567. return true;
  568. Known = SrcKnown.zextOrTrunc(BitWidth, false);
  569. break;
  570. }
  571. case ISD::BUILD_VECTOR:
  572. // Collect the known bits that are shared by every constant vector element.
  573. Known.Zero.setAllBits(); Known.One.setAllBits();
  574. for (SDValue SrcOp : Op->ops()) {
  575. if (!isa<ConstantSDNode>(SrcOp)) {
  576. // We can only handle all constant values - bail out with no known bits.
  577. Known = KnownBits(BitWidth);
  578. return false;
  579. }
  580. Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
  581. Known2.Zero = ~Known2.One;
  582. // BUILD_VECTOR can implicitly truncate sources, we must handle this.
  583. if (Known2.One.getBitWidth() != BitWidth) {
  584. assert(Known2.getBitWidth() > BitWidth &&
  585. "Expected BUILD_VECTOR implicit truncation");
  586. Known2 = Known2.trunc(BitWidth);
  587. }
  588. // Known bits are the values that are shared by every element.
  589. // TODO: support per-element known bits.
  590. Known.One &= Known2.One;
  591. Known.Zero &= Known2.Zero;
  592. }
  593. return false; // Don't fall through, will infinitely loop.
  594. case ISD::LOAD: {
  595. LoadSDNode *LD = cast<LoadSDNode>(Op);
  596. if (getTargetConstantFromLoad(LD)) {
  597. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  598. return false; // Don't fall through, will infinitely loop.
  599. }
  600. break;
  601. }
  602. case ISD::INSERT_VECTOR_ELT: {
  603. SDValue Vec = Op.getOperand(0);
  604. SDValue Scl = Op.getOperand(1);
  605. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  606. EVT VecVT = Vec.getValueType();
  607. // If index isn't constant, assume we need all vector elements AND the
  608. // inserted element.
  609. APInt DemandedVecElts(OriginalDemandedElts);
  610. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
  611. unsigned Idx = CIdx->getZExtValue();
  612. DemandedVecElts.clearBit(Idx);
  613. // Inserted element is not required.
  614. if (!OriginalDemandedElts[Idx])
  615. return TLO.CombineTo(Op, Vec);
  616. }
  617. KnownBits KnownScl;
  618. unsigned NumSclBits = Scl.getScalarValueSizeInBits();
  619. APInt DemandedSclBits = OriginalDemandedBits.zextOrTrunc(NumSclBits);
  620. if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
  621. return true;
  622. Known = KnownScl.zextOrTrunc(BitWidth, false);
  623. KnownBits KnownVec;
  624. if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
  625. KnownVec, TLO, Depth + 1))
  626. return true;
  627. if (!!DemandedVecElts) {
  628. Known.One &= KnownVec.One;
  629. Known.Zero &= KnownVec.Zero;
  630. }
  631. return false;
  632. }
  633. case ISD::INSERT_SUBVECTOR: {
  634. SDValue Base = Op.getOperand(0);
  635. SDValue Sub = Op.getOperand(1);
  636. EVT SubVT = Sub.getValueType();
  637. unsigned NumSubElts = SubVT.getVectorNumElements();
  638. // If index isn't constant, assume we need the original demanded base
  639. // elements and ALL the inserted subvector elements.
  640. APInt BaseElts = DemandedElts;
  641. APInt SubElts = APInt::getAllOnesValue(NumSubElts);
  642. if (isa<ConstantSDNode>(Op.getOperand(2))) {
  643. const APInt &Idx = Op.getConstantOperandAPInt(2);
  644. if (Idx.ule(NumElts - NumSubElts)) {
  645. unsigned SubIdx = Idx.getZExtValue();
  646. SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
  647. BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
  648. }
  649. }
  650. KnownBits KnownSub, KnownBase;
  651. if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
  652. Depth + 1))
  653. return true;
  654. if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
  655. Depth + 1))
  656. return true;
  657. Known.Zero.setAllBits();
  658. Known.One.setAllBits();
  659. if (!!SubElts) {
  660. Known.One &= KnownSub.One;
  661. Known.Zero &= KnownSub.Zero;
  662. }
  663. if (!!BaseElts) {
  664. Known.One &= KnownBase.One;
  665. Known.Zero &= KnownBase.Zero;
  666. }
  667. break;
  668. }
  669. case ISD::CONCAT_VECTORS: {
  670. Known.Zero.setAllBits();
  671. Known.One.setAllBits();
  672. EVT SubVT = Op.getOperand(0).getValueType();
  673. unsigned NumSubVecs = Op.getNumOperands();
  674. unsigned NumSubElts = SubVT.getVectorNumElements();
  675. for (unsigned i = 0; i != NumSubVecs; ++i) {
  676. APInt DemandedSubElts =
  677. DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  678. if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
  679. Known2, TLO, Depth + 1))
  680. return true;
  681. // Known bits are shared by every demanded subvector element.
  682. if (!!DemandedSubElts) {
  683. Known.One &= Known2.One;
  684. Known.Zero &= Known2.Zero;
  685. }
  686. }
  687. break;
  688. }
  689. case ISD::VECTOR_SHUFFLE: {
  690. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  691. // Collect demanded elements from shuffle operands..
  692. APInt DemandedLHS(NumElts, 0);
  693. APInt DemandedRHS(NumElts, 0);
  694. for (unsigned i = 0; i != NumElts; ++i) {
  695. if (!DemandedElts[i])
  696. continue;
  697. int M = ShuffleMask[i];
  698. if (M < 0) {
  699. // For UNDEF elements, we don't know anything about the common state of
  700. // the shuffle result.
  701. DemandedLHS.clearAllBits();
  702. DemandedRHS.clearAllBits();
  703. break;
  704. }
  705. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  706. if (M < (int)NumElts)
  707. DemandedLHS.setBit(M);
  708. else
  709. DemandedRHS.setBit(M - NumElts);
  710. }
  711. if (!!DemandedLHS || !!DemandedRHS) {
  712. Known.Zero.setAllBits();
  713. Known.One.setAllBits();
  714. if (!!DemandedLHS) {
  715. if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
  716. Known2, TLO, Depth + 1))
  717. return true;
  718. Known.One &= Known2.One;
  719. Known.Zero &= Known2.Zero;
  720. }
  721. if (!!DemandedRHS) {
  722. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
  723. Known2, TLO, Depth + 1))
  724. return true;
  725. Known.One &= Known2.One;
  726. Known.Zero &= Known2.Zero;
  727. }
  728. }
  729. break;
  730. }
  731. case ISD::AND: {
  732. SDValue Op0 = Op.getOperand(0);
  733. SDValue Op1 = Op.getOperand(1);
  734. // If the RHS is a constant, check to see if the LHS would be zero without
  735. // using the bits from the RHS. Below, we use knowledge about the RHS to
  736. // simplify the LHS, here we're using information from the LHS to simplify
  737. // the RHS.
  738. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
  739. // Do not increment Depth here; that can cause an infinite loop.
  740. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
  741. // If the LHS already has zeros where RHSC does, this 'and' is dead.
  742. if ((LHSKnown.Zero & DemandedBits) ==
  743. (~RHSC->getAPIntValue() & DemandedBits))
  744. return TLO.CombineTo(Op, Op0);
  745. // If any of the set bits in the RHS are known zero on the LHS, shrink
  746. // the constant.
  747. if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
  748. return true;
  749. // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
  750. // constant, but if this 'and' is only clearing bits that were just set by
  751. // the xor, then this 'and' can be eliminated by shrinking the mask of
  752. // the xor. For example, for a 32-bit X:
  753. // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
  754. if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
  755. LHSKnown.One == ~RHSC->getAPIntValue()) {
  756. SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
  757. return TLO.CombineTo(Op, Xor);
  758. }
  759. }
  760. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  761. Depth + 1))
  762. return true;
  763. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  764. if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
  765. Known2, TLO, Depth + 1))
  766. return true;
  767. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  768. // If all of the demanded bits are known one on one side, return the other.
  769. // These bits cannot contribute to the result of the 'and'.
  770. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
  771. return TLO.CombineTo(Op, Op0);
  772. if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
  773. return TLO.CombineTo(Op, Op1);
  774. // If all of the demanded bits in the inputs are known zeros, return zero.
  775. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  776. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
  777. // If the RHS is a constant, see if we can simplify it.
  778. if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
  779. return true;
  780. // If the operation can be done in a smaller type, do so.
  781. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  782. return true;
  783. // Output known-1 bits are only known if set in both the LHS & RHS.
  784. Known.One &= Known2.One;
  785. // Output known-0 are known to be clear if zero in either the LHS | RHS.
  786. Known.Zero |= Known2.Zero;
  787. break;
  788. }
  789. case ISD::OR: {
  790. SDValue Op0 = Op.getOperand(0);
  791. SDValue Op1 = Op.getOperand(1);
  792. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  793. Depth + 1))
  794. return true;
  795. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  796. if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
  797. Known2, TLO, Depth + 1))
  798. return true;
  799. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  800. // If all of the demanded bits are known zero on one side, return the other.
  801. // These bits cannot contribute to the result of the 'or'.
  802. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
  803. return TLO.CombineTo(Op, Op0);
  804. if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
  805. return TLO.CombineTo(Op, Op1);
  806. // If the RHS is a constant, see if we can simplify it.
  807. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  808. return true;
  809. // If the operation can be done in a smaller type, do so.
  810. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  811. return true;
  812. // Output known-0 bits are only known if clear in both the LHS & RHS.
  813. Known.Zero &= Known2.Zero;
  814. // Output known-1 are known to be set if set in either the LHS | RHS.
  815. Known.One |= Known2.One;
  816. break;
  817. }
  818. case ISD::XOR: {
  819. SDValue Op0 = Op.getOperand(0);
  820. SDValue Op1 = Op.getOperand(1);
  821. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  822. Depth + 1))
  823. return true;
  824. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  825. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
  826. Depth + 1))
  827. return true;
  828. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  829. // If all of the demanded bits are known zero on one side, return the other.
  830. // These bits cannot contribute to the result of the 'xor'.
  831. if (DemandedBits.isSubsetOf(Known.Zero))
  832. return TLO.CombineTo(Op, Op0);
  833. if (DemandedBits.isSubsetOf(Known2.Zero))
  834. return TLO.CombineTo(Op, Op1);
  835. // If the operation can be done in a smaller type, do so.
  836. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  837. return true;
  838. // If all of the unknown bits are known to be zero on one side or the other
  839. // (but not both) turn this into an *inclusive* or.
  840. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  841. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  842. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
  843. // Output known-0 bits are known if clear or set in both the LHS & RHS.
  844. KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
  845. // Output known-1 are known to be set if set in only one of the LHS, RHS.
  846. KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
  847. if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
  848. // If one side is a constant, and all of the known set bits on the other
  849. // side are also set in the constant, turn this into an AND, as we know
  850. // the bits will be cleared.
  851. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  852. // NB: it is okay if more bits are known than are requested
  853. if (C->getAPIntValue() == Known2.One) {
  854. SDValue ANDC =
  855. TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
  856. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
  857. }
  858. // If the RHS is a constant, see if we can change it. Don't alter a -1
  859. // constant because that's a 'not' op, and that is better for combining
  860. // and codegen.
  861. if (!C->isAllOnesValue()) {
  862. if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
  863. // We're flipping all demanded bits. Flip the undemanded bits too.
  864. SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
  865. return TLO.CombineTo(Op, New);
  866. }
  867. // If we can't turn this into a 'not', try to shrink the constant.
  868. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  869. return true;
  870. }
  871. }
  872. Known = std::move(KnownOut);
  873. break;
  874. }
  875. case ISD::SELECT:
  876. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
  877. Depth + 1))
  878. return true;
  879. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
  880. Depth + 1))
  881. return true;
  882. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  883. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  884. // If the operands are constants, see if we can simplify them.
  885. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  886. return true;
  887. // Only known if known in both the LHS and RHS.
  888. Known.One &= Known2.One;
  889. Known.Zero &= Known2.Zero;
  890. break;
  891. case ISD::SELECT_CC:
  892. if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
  893. Depth + 1))
  894. return true;
  895. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
  896. Depth + 1))
  897. return true;
  898. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  899. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  900. // If the operands are constants, see if we can simplify them.
  901. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  902. return true;
  903. // Only known if known in both the LHS and RHS.
  904. Known.One &= Known2.One;
  905. Known.Zero &= Known2.Zero;
  906. break;
  907. case ISD::SETCC: {
  908. SDValue Op0 = Op.getOperand(0);
  909. SDValue Op1 = Op.getOperand(1);
  910. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  911. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  912. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  913. // -1, we may be able to bypass the setcc.
  914. if (DemandedBits.isSignMask() &&
  915. Op0.getScalarValueSizeInBits() == BitWidth &&
  916. getBooleanContents(VT) ==
  917. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  918. // If we're testing X < 0, then this compare isn't needed - just use X!
  919. // FIXME: We're limiting to integer types here, but this should also work
  920. // if we don't care about FP signed-zero. The use of SETLT with FP means
  921. // that we don't care about NaNs.
  922. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  923. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  924. return TLO.CombineTo(Op, Op0);
  925. // TODO: Should we check for other forms of sign-bit comparisons?
  926. // Examples: X <= -1, X >= 0
  927. }
  928. if (getBooleanContents(Op0.getValueType()) ==
  929. TargetLowering::ZeroOrOneBooleanContent &&
  930. BitWidth > 1)
  931. Known.Zero.setBitsFrom(1);
  932. break;
  933. }
  934. case ISD::SHL: {
  935. SDValue Op0 = Op.getOperand(0);
  936. SDValue Op1 = Op.getOperand(1);
  937. if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
  938. // If the shift count is an invalid immediate, don't do anything.
  939. if (SA->getAPIntValue().uge(BitWidth))
  940. break;
  941. unsigned ShAmt = SA->getZExtValue();
  942. // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
  943. // single shift. We can do this if the bottom bits (which are shifted
  944. // out) are never demanded.
  945. if (Op0.getOpcode() == ISD::SRL) {
  946. if (ShAmt &&
  947. (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
  948. if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
  949. if (SA2->getAPIntValue().ult(BitWidth)) {
  950. unsigned C1 = SA2->getZExtValue();
  951. unsigned Opc = ISD::SHL;
  952. int Diff = ShAmt - C1;
  953. if (Diff < 0) {
  954. Diff = -Diff;
  955. Opc = ISD::SRL;
  956. }
  957. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
  958. return TLO.CombineTo(
  959. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  960. }
  961. }
  962. }
  963. }
  964. if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
  965. Known, TLO, Depth + 1))
  966. return true;
  967. // Try shrinking the operation as long as the shift amount will still be
  968. // in range.
  969. if ((ShAmt < DemandedBits.getActiveBits()) &&
  970. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  971. return true;
  972. // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
  973. // are not demanded. This will likely allow the anyext to be folded away.
  974. if (Op0.getOpcode() == ISD::ANY_EXTEND) {
  975. SDValue InnerOp = Op0.getOperand(0);
  976. EVT InnerVT = InnerOp.getValueType();
  977. unsigned InnerBits = InnerVT.getScalarSizeInBits();
  978. if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
  979. isTypeDesirableForOp(ISD::SHL, InnerVT)) {
  980. EVT ShTy = getShiftAmountTy(InnerVT, DL);
  981. if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
  982. ShTy = InnerVT;
  983. SDValue NarrowShl =
  984. TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
  985. TLO.DAG.getConstant(ShAmt, dl, ShTy));
  986. return TLO.CombineTo(
  987. Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
  988. }
  989. // Repeat the SHL optimization above in cases where an extension
  990. // intervenes: (shl (anyext (shr x, c1)), c2) to
  991. // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
  992. // aren't demanded (as above) and that the shifted upper c1 bits of
  993. // x aren't demanded.
  994. if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
  995. InnerOp.hasOneUse()) {
  996. if (ConstantSDNode *SA2 =
  997. isConstOrConstSplat(InnerOp.getOperand(1))) {
  998. unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
  999. if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
  1000. DemandedBits.getActiveBits() <=
  1001. (InnerBits - InnerShAmt + ShAmt) &&
  1002. DemandedBits.countTrailingZeros() >= ShAmt) {
  1003. SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
  1004. Op1.getValueType());
  1005. SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
  1006. InnerOp.getOperand(0));
  1007. return TLO.CombineTo(
  1008. Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
  1009. }
  1010. }
  1011. }
  1012. }
  1013. Known.Zero <<= ShAmt;
  1014. Known.One <<= ShAmt;
  1015. // low bits known zero.
  1016. Known.Zero.setLowBits(ShAmt);
  1017. }
  1018. break;
  1019. }
  1020. case ISD::SRL: {
  1021. SDValue Op0 = Op.getOperand(0);
  1022. SDValue Op1 = Op.getOperand(1);
  1023. if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
  1024. // If the shift count is an invalid immediate, don't do anything.
  1025. if (SA->getAPIntValue().uge(BitWidth))
  1026. break;
  1027. unsigned ShAmt = SA->getZExtValue();
  1028. APInt InDemandedMask = (DemandedBits << ShAmt);
  1029. // If the shift is exact, then it does demand the low bits (and knows that
  1030. // they are zero).
  1031. if (Op->getFlags().hasExact())
  1032. InDemandedMask.setLowBits(ShAmt);
  1033. // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
  1034. // single shift. We can do this if the top bits (which are shifted out)
  1035. // are never demanded.
  1036. if (Op0.getOpcode() == ISD::SHL) {
  1037. if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
  1038. if (ShAmt &&
  1039. (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
  1040. if (SA2->getAPIntValue().ult(BitWidth)) {
  1041. unsigned C1 = SA2->getZExtValue();
  1042. unsigned Opc = ISD::SRL;
  1043. int Diff = ShAmt - C1;
  1044. if (Diff < 0) {
  1045. Diff = -Diff;
  1046. Opc = ISD::SHL;
  1047. }
  1048. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
  1049. return TLO.CombineTo(
  1050. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1051. }
  1052. }
  1053. }
  1054. }
  1055. // Compute the new bits that are at the top now.
  1056. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1057. Depth + 1))
  1058. return true;
  1059. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1060. Known.Zero.lshrInPlace(ShAmt);
  1061. Known.One.lshrInPlace(ShAmt);
  1062. Known.Zero.setHighBits(ShAmt); // High bits known zero.
  1063. }
  1064. break;
  1065. }
  1066. case ISD::SRA: {
  1067. SDValue Op0 = Op.getOperand(0);
  1068. SDValue Op1 = Op.getOperand(1);
  1069. // If this is an arithmetic shift right and only the low-bit is set, we can
  1070. // always convert this into a logical shr, even if the shift amount is
  1071. // variable. The low bit of the shift cannot be an input sign bit unless
  1072. // the shift amount is >= the size of the datatype, which is undefined.
  1073. if (DemandedBits.isOneValue())
  1074. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1075. if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
  1076. // If the shift count is an invalid immediate, don't do anything.
  1077. if (SA->getAPIntValue().uge(BitWidth))
  1078. break;
  1079. unsigned ShAmt = SA->getZExtValue();
  1080. APInt InDemandedMask = (DemandedBits << ShAmt);
  1081. // If the shift is exact, then it does demand the low bits (and knows that
  1082. // they are zero).
  1083. if (Op->getFlags().hasExact())
  1084. InDemandedMask.setLowBits(ShAmt);
  1085. // If any of the demanded bits are produced by the sign extension, we also
  1086. // demand the input sign bit.
  1087. if (DemandedBits.countLeadingZeros() < ShAmt)
  1088. InDemandedMask.setSignBit();
  1089. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1090. Depth + 1))
  1091. return true;
  1092. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1093. Known.Zero.lshrInPlace(ShAmt);
  1094. Known.One.lshrInPlace(ShAmt);
  1095. // If the input sign bit is known to be zero, or if none of the top bits
  1096. // are demanded, turn this into an unsigned shift right.
  1097. if (Known.Zero[BitWidth - ShAmt - 1] ||
  1098. DemandedBits.countLeadingZeros() >= ShAmt) {
  1099. SDNodeFlags Flags;
  1100. Flags.setExact(Op->getFlags().hasExact());
  1101. return TLO.CombineTo(
  1102. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
  1103. }
  1104. int Log2 = DemandedBits.exactLogBase2();
  1105. if (Log2 >= 0) {
  1106. // The bit must come from the sign.
  1107. SDValue NewSA =
  1108. TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
  1109. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
  1110. }
  1111. if (Known.One[BitWidth - ShAmt - 1])
  1112. // New bits are known one.
  1113. Known.One.setHighBits(ShAmt);
  1114. }
  1115. break;
  1116. }
  1117. case ISD::FSHL:
  1118. case ISD::FSHR: {
  1119. SDValue Op0 = Op.getOperand(0);
  1120. SDValue Op1 = Op.getOperand(1);
  1121. SDValue Op2 = Op.getOperand(2);
  1122. bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
  1123. if (ConstantSDNode *SA = isConstOrConstSplat(Op2)) {
  1124. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1125. // For fshl, 0-shift returns the 1st arg.
  1126. // For fshr, 0-shift returns the 2nd arg.
  1127. if (Amt == 0) {
  1128. if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
  1129. Known, TLO, Depth + 1))
  1130. return true;
  1131. break;
  1132. }
  1133. // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
  1134. // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
  1135. APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
  1136. APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
  1137. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1138. Depth + 1))
  1139. return true;
  1140. if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
  1141. Depth + 1))
  1142. return true;
  1143. Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1144. Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1145. Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1146. Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1147. Known.One |= Known2.One;
  1148. Known.Zero |= Known2.Zero;
  1149. }
  1150. break;
  1151. }
  1152. case ISD::BITREVERSE: {
  1153. SDValue Src = Op.getOperand(0);
  1154. APInt DemandedSrcBits = DemandedBits.reverseBits();
  1155. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1156. Depth + 1))
  1157. return true;
  1158. Known.One = Known2.One.reverseBits();
  1159. Known.Zero = Known2.Zero.reverseBits();
  1160. break;
  1161. }
  1162. case ISD::SIGN_EXTEND_INREG: {
  1163. SDValue Op0 = Op.getOperand(0);
  1164. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1165. unsigned ExVTBits = ExVT.getScalarSizeInBits();
  1166. // If we only care about the highest bit, don't bother shifting right.
  1167. if (DemandedBits.isSignMask()) {
  1168. unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
  1169. bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
  1170. // However if the input is already sign extended we expect the sign
  1171. // extension to be dropped altogether later and do not simplify.
  1172. if (!AlreadySignExtended) {
  1173. // Compute the correct shift amount type, which must be getShiftAmountTy
  1174. // for scalar types after legalization.
  1175. EVT ShiftAmtTy = VT;
  1176. if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
  1177. ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
  1178. SDValue ShiftAmt =
  1179. TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
  1180. return TLO.CombineTo(Op,
  1181. TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
  1182. }
  1183. }
  1184. // If none of the extended bits are demanded, eliminate the sextinreg.
  1185. if (DemandedBits.getActiveBits() <= ExVTBits)
  1186. return TLO.CombineTo(Op, Op0);
  1187. APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
  1188. // Since the sign extended bits are demanded, we know that the sign
  1189. // bit is demanded.
  1190. InputDemandedBits.setBit(ExVTBits - 1);
  1191. if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
  1192. return true;
  1193. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1194. // If the sign bit of the input is known set or clear, then we know the
  1195. // top bits of the result.
  1196. // If the input sign bit is known zero, convert this into a zero extension.
  1197. if (Known.Zero[ExVTBits - 1])
  1198. return TLO.CombineTo(
  1199. Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
  1200. APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
  1201. if (Known.One[ExVTBits - 1]) { // Input sign bit known set
  1202. Known.One.setBitsFrom(ExVTBits);
  1203. Known.Zero &= Mask;
  1204. } else { // Input sign bit unknown
  1205. Known.Zero &= Mask;
  1206. Known.One &= Mask;
  1207. }
  1208. break;
  1209. }
  1210. case ISD::BUILD_PAIR: {
  1211. EVT HalfVT = Op.getOperand(0).getValueType();
  1212. unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
  1213. APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
  1214. APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
  1215. KnownBits KnownLo, KnownHi;
  1216. if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
  1217. return true;
  1218. if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
  1219. return true;
  1220. Known.Zero = KnownLo.Zero.zext(BitWidth) |
  1221. KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
  1222. Known.One = KnownLo.One.zext(BitWidth) |
  1223. KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
  1224. break;
  1225. }
  1226. case ISD::ZERO_EXTEND: {
  1227. SDValue Src = Op.getOperand(0);
  1228. unsigned InBits = Src.getScalarValueSizeInBits();
  1229. // If none of the top bits are demanded, convert this into an any_extend.
  1230. if (DemandedBits.getActiveBits() <= InBits)
  1231. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
  1232. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1233. if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
  1234. return true;
  1235. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1236. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1237. Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
  1238. break;
  1239. }
  1240. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  1241. // TODO - merge this with ZERO_EXTEND above?
  1242. SDValue Src = Op.getOperand(0);
  1243. EVT SrcVT = Src.getValueType();
  1244. unsigned InBits = SrcVT.getScalarSizeInBits();
  1245. unsigned InElts = SrcVT.getVectorNumElements();
  1246. // If we only need the non-extended bits of the bottom element
  1247. // then we can just bitcast to the result.
  1248. if (DemandedBits.getActiveBits() <= InBits && DemandedElts == 1 &&
  1249. VT.getSizeInBits() == SrcVT.getSizeInBits() &&
  1250. TLO.DAG.getDataLayout().isLittleEndian())
  1251. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1252. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1253. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1254. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1255. Depth + 1))
  1256. return true;
  1257. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1258. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1259. Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
  1260. break;
  1261. }
  1262. case ISD::SIGN_EXTEND: {
  1263. SDValue Src = Op.getOperand(0);
  1264. unsigned InBits = Src.getScalarValueSizeInBits();
  1265. // If none of the top bits are demanded, convert this into an any_extend.
  1266. if (DemandedBits.getActiveBits() <= InBits)
  1267. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
  1268. // Since some of the sign extended bits are demanded, we know that the sign
  1269. // bit is demanded.
  1270. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1271. InDemandedBits.setBit(InBits - 1);
  1272. if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
  1273. return true;
  1274. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1275. // If the sign bit is known one, the top bits match.
  1276. Known = Known.sext(BitWidth);
  1277. // If the sign bit is known zero, convert this to a zero extend.
  1278. if (Known.isNonNegative())
  1279. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
  1280. break;
  1281. }
  1282. case ISD::SIGN_EXTEND_VECTOR_INREG: {
  1283. // TODO - merge this with SIGN_EXTEND above?
  1284. SDValue Src = Op.getOperand(0);
  1285. unsigned InBits = Src.getScalarValueSizeInBits();
  1286. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1287. // If some of the sign extended bits are demanded, we know that the sign
  1288. // bit is demanded.
  1289. if (InBits < DemandedBits.getActiveBits())
  1290. InDemandedBits.setBit(InBits - 1);
  1291. if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
  1292. return true;
  1293. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1294. // If the sign bit is known one, the top bits match.
  1295. Known = Known.sext(BitWidth);
  1296. break;
  1297. }
  1298. case ISD::ANY_EXTEND: {
  1299. SDValue Src = Op.getOperand(0);
  1300. unsigned InBits = Src.getScalarValueSizeInBits();
  1301. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1302. if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
  1303. return true;
  1304. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1305. Known = Known.zext(BitWidth, false /* => any extend */);
  1306. break;
  1307. }
  1308. case ISD::TRUNCATE: {
  1309. SDValue Src = Op.getOperand(0);
  1310. // Simplify the input, using demanded bit information, and compute the known
  1311. // zero/one bits live out.
  1312. unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
  1313. APInt TruncMask = DemandedBits.zext(OperandBitWidth);
  1314. if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
  1315. return true;
  1316. Known = Known.trunc(BitWidth);
  1317. // If the input is only used by this truncate, see if we can shrink it based
  1318. // on the known demanded bits.
  1319. if (Src.getNode()->hasOneUse()) {
  1320. switch (Src.getOpcode()) {
  1321. default:
  1322. break;
  1323. case ISD::SRL:
  1324. // Shrink SRL by a constant if none of the high bits shifted in are
  1325. // demanded.
  1326. if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
  1327. // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
  1328. // undesirable.
  1329. break;
  1330. auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
  1331. if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
  1332. break;
  1333. SDValue Shift = Src.getOperand(1);
  1334. uint64_t ShVal = ShAmt->getZExtValue();
  1335. if (TLO.LegalTypes())
  1336. Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
  1337. APInt HighBits =
  1338. APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
  1339. HighBits.lshrInPlace(ShVal);
  1340. HighBits = HighBits.trunc(BitWidth);
  1341. if (!(HighBits & DemandedBits)) {
  1342. // None of the shifted in bits are needed. Add a truncate of the
  1343. // shift input, then shift it.
  1344. SDValue NewTrunc =
  1345. TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
  1346. return TLO.CombineTo(
  1347. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
  1348. }
  1349. break;
  1350. }
  1351. }
  1352. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1353. break;
  1354. }
  1355. case ISD::AssertZext: {
  1356. // AssertZext demands all of the high bits, plus any of the low bits
  1357. // demanded by its users.
  1358. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1359. APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
  1360. if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
  1361. TLO, Depth + 1))
  1362. return true;
  1363. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1364. Known.Zero |= ~InMask;
  1365. break;
  1366. }
  1367. case ISD::EXTRACT_VECTOR_ELT: {
  1368. SDValue Src = Op.getOperand(0);
  1369. SDValue Idx = Op.getOperand(1);
  1370. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  1371. unsigned EltBitWidth = Src.getScalarValueSizeInBits();
  1372. // Demand the bits from every vector element without a constant index.
  1373. APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
  1374. if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
  1375. if (CIdx->getAPIntValue().ult(NumSrcElts))
  1376. DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
  1377. // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
  1378. // anything about the extended bits.
  1379. APInt DemandedSrcBits = DemandedBits;
  1380. if (BitWidth > EltBitWidth)
  1381. DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
  1382. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
  1383. Depth + 1))
  1384. return true;
  1385. Known = Known2;
  1386. if (BitWidth > EltBitWidth)
  1387. Known = Known.zext(BitWidth, false /* => any extend */);
  1388. break;
  1389. }
  1390. case ISD::BITCAST: {
  1391. SDValue Src = Op.getOperand(0);
  1392. EVT SrcVT = Src.getValueType();
  1393. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  1394. // If this is an FP->Int bitcast and if the sign bit is the only
  1395. // thing demanded, turn this into a FGETSIGN.
  1396. if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
  1397. DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
  1398. SrcVT.isFloatingPoint()) {
  1399. bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
  1400. bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
  1401. if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
  1402. SrcVT != MVT::f128) {
  1403. // Cannot eliminate/lower SHL for f128 yet.
  1404. EVT Ty = OpVTLegal ? VT : MVT::i32;
  1405. // Make a FGETSIGN + SHL to move the sign bit into the appropriate
  1406. // place. We expect the SHL to be eliminated by other optimizations.
  1407. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
  1408. unsigned OpVTSizeInBits = Op.getValueSizeInBits();
  1409. if (!OpVTLegal && OpVTSizeInBits > 32)
  1410. Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
  1411. unsigned ShVal = Op.getValueSizeInBits() - 1;
  1412. SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
  1413. return TLO.CombineTo(Op,
  1414. TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
  1415. }
  1416. }
  1417. // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
  1418. // Demand the elt/bit if any of the original elts/bits are demanded.
  1419. // TODO - bigendian once we have test coverage.
  1420. // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
  1421. if (SrcVT.isVector() && NumSrcEltBits > 1 &&
  1422. (BitWidth % NumSrcEltBits) == 0 &&
  1423. TLO.DAG.getDataLayout().isLittleEndian()) {
  1424. unsigned Scale = BitWidth / NumSrcEltBits;
  1425. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  1426. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  1427. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  1428. for (unsigned i = 0; i != Scale; ++i) {
  1429. unsigned Offset = i * NumSrcEltBits;
  1430. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
  1431. if (!Sub.isNullValue()) {
  1432. DemandedSrcBits |= Sub;
  1433. for (unsigned j = 0; j != NumElts; ++j)
  1434. if (DemandedElts[j])
  1435. DemandedSrcElts.setBit((j * Scale) + i);
  1436. }
  1437. }
  1438. APInt KnownSrcUndef, KnownSrcZero;
  1439. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  1440. KnownSrcZero, TLO, Depth + 1))
  1441. return true;
  1442. KnownBits KnownSrcBits;
  1443. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  1444. KnownSrcBits, TLO, Depth + 1))
  1445. return true;
  1446. } else if ((NumSrcEltBits % BitWidth) == 0 &&
  1447. TLO.DAG.getDataLayout().isLittleEndian()) {
  1448. unsigned Scale = NumSrcEltBits / BitWidth;
  1449. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1450. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  1451. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  1452. for (unsigned i = 0; i != NumElts; ++i)
  1453. if (DemandedElts[i]) {
  1454. unsigned Offset = (i % Scale) * BitWidth;
  1455. DemandedSrcBits.insertBits(DemandedBits, Offset);
  1456. DemandedSrcElts.setBit(i / Scale);
  1457. }
  1458. if (SrcVT.isVector()) {
  1459. APInt KnownSrcUndef, KnownSrcZero;
  1460. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  1461. KnownSrcZero, TLO, Depth + 1))
  1462. return true;
  1463. }
  1464. KnownBits KnownSrcBits;
  1465. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  1466. KnownSrcBits, TLO, Depth + 1))
  1467. return true;
  1468. }
  1469. // If this is a bitcast, let computeKnownBits handle it. Only do this on a
  1470. // recursive call where Known may be useful to the caller.
  1471. if (Depth > 0) {
  1472. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1473. return false;
  1474. }
  1475. break;
  1476. }
  1477. case ISD::ADD:
  1478. case ISD::MUL:
  1479. case ISD::SUB: {
  1480. // Add, Sub, and Mul don't demand any bits in positions beyond that
  1481. // of the highest bit demanded of them.
  1482. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
  1483. unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
  1484. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
  1485. if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
  1486. Depth + 1) ||
  1487. SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
  1488. Depth + 1) ||
  1489. // See if the operation should be performed at a smaller bit width.
  1490. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
  1491. SDNodeFlags Flags = Op.getNode()->getFlags();
  1492. if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
  1493. // Disable the nsw and nuw flags. We can no longer guarantee that we
  1494. // won't wrap after simplification.
  1495. Flags.setNoSignedWrap(false);
  1496. Flags.setNoUnsignedWrap(false);
  1497. SDValue NewOp =
  1498. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  1499. return TLO.CombineTo(Op, NewOp);
  1500. }
  1501. return true;
  1502. }
  1503. // If we have a constant operand, we may be able to turn it into -1 if we
  1504. // do not demand the high bits. This can make the constant smaller to
  1505. // encode, allow more general folding, or match specialized instruction
  1506. // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
  1507. // is probably not useful (and could be detrimental).
  1508. ConstantSDNode *C = isConstOrConstSplat(Op1);
  1509. APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
  1510. if (C && !C->isAllOnesValue() && !C->isOne() &&
  1511. (C->getAPIntValue() | HighMask).isAllOnesValue()) {
  1512. SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
  1513. // We can't guarantee that the new math op doesn't wrap, so explicitly
  1514. // clear those flags to prevent folding with a potential existing node
  1515. // that has those flags set.
  1516. SDNodeFlags Flags;
  1517. Flags.setNoSignedWrap(false);
  1518. Flags.setNoUnsignedWrap(false);
  1519. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
  1520. return TLO.CombineTo(Op, NewOp);
  1521. }
  1522. LLVM_FALLTHROUGH;
  1523. }
  1524. default:
  1525. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  1526. if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
  1527. Known, TLO, Depth))
  1528. return true;
  1529. break;
  1530. }
  1531. // Just use computeKnownBits to compute output bits.
  1532. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1533. break;
  1534. }
  1535. // If we know the value of all of the demanded bits, return this as a
  1536. // constant.
  1537. if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
  1538. // Avoid folding to a constant if any OpaqueConstant is involved.
  1539. const SDNode *N = Op.getNode();
  1540. for (SDNodeIterator I = SDNodeIterator::begin(N),
  1541. E = SDNodeIterator::end(N);
  1542. I != E; ++I) {
  1543. SDNode *Op = *I;
  1544. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
  1545. if (C->isOpaque())
  1546. return false;
  1547. }
  1548. // TODO: Handle float bits as well.
  1549. if (VT.isInteger())
  1550. return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
  1551. }
  1552. return false;
  1553. }
  1554. bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
  1555. const APInt &DemandedElts,
  1556. APInt &KnownUndef,
  1557. APInt &KnownZero,
  1558. DAGCombinerInfo &DCI) const {
  1559. SelectionDAG &DAG = DCI.DAG;
  1560. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  1561. !DCI.isBeforeLegalizeOps());
  1562. bool Simplified =
  1563. SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
  1564. if (Simplified) {
  1565. DCI.AddToWorklist(Op.getNode());
  1566. DCI.CommitTargetLoweringOpt(TLO);
  1567. }
  1568. return Simplified;
  1569. }
  1570. /// Given a vector binary operation and known undefined elements for each input
  1571. /// operand, compute whether each element of the output is undefined.
  1572. static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
  1573. const APInt &UndefOp0,
  1574. const APInt &UndefOp1) {
  1575. EVT VT = BO.getValueType();
  1576. assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
  1577. "Vector binop only");
  1578. EVT EltVT = VT.getVectorElementType();
  1579. unsigned NumElts = VT.getVectorNumElements();
  1580. assert(UndefOp0.getBitWidth() == NumElts &&
  1581. UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
  1582. auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
  1583. const APInt &UndefVals) {
  1584. if (UndefVals[Index])
  1585. return DAG.getUNDEF(EltVT);
  1586. if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
  1587. // Try hard to make sure that the getNode() call is not creating temporary
  1588. // nodes. Ignore opaque integers because they do not constant fold.
  1589. SDValue Elt = BV->getOperand(Index);
  1590. auto *C = dyn_cast<ConstantSDNode>(Elt);
  1591. if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
  1592. return Elt;
  1593. }
  1594. return SDValue();
  1595. };
  1596. APInt KnownUndef = APInt::getNullValue(NumElts);
  1597. for (unsigned i = 0; i != NumElts; ++i) {
  1598. // If both inputs for this element are either constant or undef and match
  1599. // the element type, compute the constant/undef result for this element of
  1600. // the vector.
  1601. // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
  1602. // not handle FP constants. The code within getNode() should be refactored
  1603. // to avoid the danger of creating a bogus temporary node here.
  1604. SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
  1605. SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
  1606. if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
  1607. if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
  1608. KnownUndef.setBit(i);
  1609. }
  1610. return KnownUndef;
  1611. }
  1612. bool TargetLowering::SimplifyDemandedVectorElts(
  1613. SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
  1614. APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
  1615. bool AssumeSingleUse) const {
  1616. EVT VT = Op.getValueType();
  1617. APInt DemandedElts = DemandedEltMask;
  1618. unsigned NumElts = DemandedElts.getBitWidth();
  1619. assert(VT.isVector() && "Expected vector op");
  1620. assert(VT.getVectorNumElements() == NumElts &&
  1621. "Mask size mismatches value type element count!");
  1622. KnownUndef = KnownZero = APInt::getNullValue(NumElts);
  1623. // Undef operand.
  1624. if (Op.isUndef()) {
  1625. KnownUndef.setAllBits();
  1626. return false;
  1627. }
  1628. // If Op has other users, assume that all elements are needed.
  1629. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
  1630. DemandedElts.setAllBits();
  1631. // Not demanding any elements from Op.
  1632. if (DemandedElts == 0) {
  1633. KnownUndef.setAllBits();
  1634. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1635. }
  1636. // Limit search depth.
  1637. if (Depth >= 6)
  1638. return false;
  1639. SDLoc DL(Op);
  1640. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  1641. switch (Op.getOpcode()) {
  1642. case ISD::SCALAR_TO_VECTOR: {
  1643. if (!DemandedElts[0]) {
  1644. KnownUndef.setAllBits();
  1645. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1646. }
  1647. KnownUndef.setHighBits(NumElts - 1);
  1648. break;
  1649. }
  1650. case ISD::BITCAST: {
  1651. SDValue Src = Op.getOperand(0);
  1652. EVT SrcVT = Src.getValueType();
  1653. // We only handle vectors here.
  1654. // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
  1655. if (!SrcVT.isVector())
  1656. break;
  1657. // Fast handling of 'identity' bitcasts.
  1658. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  1659. if (NumSrcElts == NumElts)
  1660. return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
  1661. KnownZero, TLO, Depth + 1);
  1662. APInt SrcZero, SrcUndef;
  1663. APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
  1664. // Bitcast from 'large element' src vector to 'small element' vector, we
  1665. // must demand a source element if any DemandedElt maps to it.
  1666. if ((NumElts % NumSrcElts) == 0) {
  1667. unsigned Scale = NumElts / NumSrcElts;
  1668. for (unsigned i = 0; i != NumElts; ++i)
  1669. if (DemandedElts[i])
  1670. SrcDemandedElts.setBit(i / Scale);
  1671. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  1672. TLO, Depth + 1))
  1673. return true;
  1674. // Try calling SimplifyDemandedBits, converting demanded elts to the bits
  1675. // of the large element.
  1676. // TODO - bigendian once we have test coverage.
  1677. if (TLO.DAG.getDataLayout().isLittleEndian()) {
  1678. unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
  1679. APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
  1680. for (unsigned i = 0; i != NumElts; ++i)
  1681. if (DemandedElts[i]) {
  1682. unsigned Ofs = (i % Scale) * EltSizeInBits;
  1683. SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
  1684. }
  1685. KnownBits Known;
  1686. if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
  1687. return true;
  1688. }
  1689. // If the src element is zero/undef then all the output elements will be -
  1690. // only demanded elements are guaranteed to be correct.
  1691. for (unsigned i = 0; i != NumSrcElts; ++i) {
  1692. if (SrcDemandedElts[i]) {
  1693. if (SrcZero[i])
  1694. KnownZero.setBits(i * Scale, (i + 1) * Scale);
  1695. if (SrcUndef[i])
  1696. KnownUndef.setBits(i * Scale, (i + 1) * Scale);
  1697. }
  1698. }
  1699. }
  1700. // Bitcast from 'small element' src vector to 'large element' vector, we
  1701. // demand all smaller source elements covered by the larger demanded element
  1702. // of this vector.
  1703. if ((NumSrcElts % NumElts) == 0) {
  1704. unsigned Scale = NumSrcElts / NumElts;
  1705. for (unsigned i = 0; i != NumElts; ++i)
  1706. if (DemandedElts[i])
  1707. SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
  1708. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  1709. TLO, Depth + 1))
  1710. return true;
  1711. // If all the src elements covering an output element are zero/undef, then
  1712. // the output element will be as well, assuming it was demanded.
  1713. for (unsigned i = 0; i != NumElts; ++i) {
  1714. if (DemandedElts[i]) {
  1715. if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
  1716. KnownZero.setBit(i);
  1717. if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
  1718. KnownUndef.setBit(i);
  1719. }
  1720. }
  1721. }
  1722. break;
  1723. }
  1724. case ISD::BUILD_VECTOR: {
  1725. // Check all elements and simplify any unused elements with UNDEF.
  1726. if (!DemandedElts.isAllOnesValue()) {
  1727. // Don't simplify BROADCASTS.
  1728. if (llvm::any_of(Op->op_values(),
  1729. [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
  1730. SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
  1731. bool Updated = false;
  1732. for (unsigned i = 0; i != NumElts; ++i) {
  1733. if (!DemandedElts[i] && !Ops[i].isUndef()) {
  1734. Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
  1735. KnownUndef.setBit(i);
  1736. Updated = true;
  1737. }
  1738. }
  1739. if (Updated)
  1740. return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
  1741. }
  1742. }
  1743. for (unsigned i = 0; i != NumElts; ++i) {
  1744. SDValue SrcOp = Op.getOperand(i);
  1745. if (SrcOp.isUndef()) {
  1746. KnownUndef.setBit(i);
  1747. } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
  1748. (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
  1749. KnownZero.setBit(i);
  1750. }
  1751. }
  1752. break;
  1753. }
  1754. case ISD::CONCAT_VECTORS: {
  1755. EVT SubVT = Op.getOperand(0).getValueType();
  1756. unsigned NumSubVecs = Op.getNumOperands();
  1757. unsigned NumSubElts = SubVT.getVectorNumElements();
  1758. for (unsigned i = 0; i != NumSubVecs; ++i) {
  1759. SDValue SubOp = Op.getOperand(i);
  1760. APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  1761. APInt SubUndef, SubZero;
  1762. if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
  1763. Depth + 1))
  1764. return true;
  1765. KnownUndef.insertBits(SubUndef, i * NumSubElts);
  1766. KnownZero.insertBits(SubZero, i * NumSubElts);
  1767. }
  1768. break;
  1769. }
  1770. case ISD::INSERT_SUBVECTOR: {
  1771. if (!isa<ConstantSDNode>(Op.getOperand(2)))
  1772. break;
  1773. SDValue Base = Op.getOperand(0);
  1774. SDValue Sub = Op.getOperand(1);
  1775. EVT SubVT = Sub.getValueType();
  1776. unsigned NumSubElts = SubVT.getVectorNumElements();
  1777. const APInt &Idx = Op.getConstantOperandAPInt(2);
  1778. if (Idx.ugt(NumElts - NumSubElts))
  1779. break;
  1780. unsigned SubIdx = Idx.getZExtValue();
  1781. APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
  1782. APInt SubUndef, SubZero;
  1783. if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
  1784. Depth + 1))
  1785. return true;
  1786. APInt BaseElts = DemandedElts;
  1787. BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
  1788. if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
  1789. Depth + 1))
  1790. return true;
  1791. KnownUndef.insertBits(SubUndef, SubIdx);
  1792. KnownZero.insertBits(SubZero, SubIdx);
  1793. break;
  1794. }
  1795. case ISD::EXTRACT_SUBVECTOR: {
  1796. SDValue Src = Op.getOperand(0);
  1797. ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  1798. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  1799. if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
  1800. // Offset the demanded elts by the subvector index.
  1801. uint64_t Idx = SubIdx->getZExtValue();
  1802. APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
  1803. APInt SrcUndef, SrcZero;
  1804. if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
  1805. Depth + 1))
  1806. return true;
  1807. KnownUndef = SrcUndef.extractBits(NumElts, Idx);
  1808. KnownZero = SrcZero.extractBits(NumElts, Idx);
  1809. }
  1810. break;
  1811. }
  1812. case ISD::INSERT_VECTOR_ELT: {
  1813. SDValue Vec = Op.getOperand(0);
  1814. SDValue Scl = Op.getOperand(1);
  1815. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  1816. // For a legal, constant insertion index, if we don't need this insertion
  1817. // then strip it, else remove it from the demanded elts.
  1818. if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
  1819. unsigned Idx = CIdx->getZExtValue();
  1820. if (!DemandedElts[Idx])
  1821. return TLO.CombineTo(Op, Vec);
  1822. APInt DemandedVecElts(DemandedElts);
  1823. DemandedVecElts.clearBit(Idx);
  1824. if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
  1825. KnownZero, TLO, Depth + 1))
  1826. return true;
  1827. KnownUndef.clearBit(Idx);
  1828. if (Scl.isUndef())
  1829. KnownUndef.setBit(Idx);
  1830. KnownZero.clearBit(Idx);
  1831. if (isNullConstant(Scl) || isNullFPConstant(Scl))
  1832. KnownZero.setBit(Idx);
  1833. break;
  1834. }
  1835. APInt VecUndef, VecZero;
  1836. if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
  1837. Depth + 1))
  1838. return true;
  1839. // Without knowing the insertion index we can't set KnownUndef/KnownZero.
  1840. break;
  1841. }
  1842. case ISD::VSELECT: {
  1843. // Try to transform the select condition based on the current demanded
  1844. // elements.
  1845. // TODO: If a condition element is undef, we can choose from one arm of the
  1846. // select (and if one arm is undef, then we can propagate that to the
  1847. // result).
  1848. // TODO - add support for constant vselect masks (see IR version of this).
  1849. APInt UnusedUndef, UnusedZero;
  1850. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
  1851. UnusedZero, TLO, Depth + 1))
  1852. return true;
  1853. // See if we can simplify either vselect operand.
  1854. APInt DemandedLHS(DemandedElts);
  1855. APInt DemandedRHS(DemandedElts);
  1856. APInt UndefLHS, ZeroLHS;
  1857. APInt UndefRHS, ZeroRHS;
  1858. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
  1859. ZeroLHS, TLO, Depth + 1))
  1860. return true;
  1861. if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
  1862. ZeroRHS, TLO, Depth + 1))
  1863. return true;
  1864. KnownUndef = UndefLHS & UndefRHS;
  1865. KnownZero = ZeroLHS & ZeroRHS;
  1866. break;
  1867. }
  1868. case ISD::VECTOR_SHUFFLE: {
  1869. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  1870. // Collect demanded elements from shuffle operands..
  1871. APInt DemandedLHS(NumElts, 0);
  1872. APInt DemandedRHS(NumElts, 0);
  1873. for (unsigned i = 0; i != NumElts; ++i) {
  1874. int M = ShuffleMask[i];
  1875. if (M < 0 || !DemandedElts[i])
  1876. continue;
  1877. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  1878. if (M < (int)NumElts)
  1879. DemandedLHS.setBit(M);
  1880. else
  1881. DemandedRHS.setBit(M - NumElts);
  1882. }
  1883. // See if we can simplify either shuffle operand.
  1884. APInt UndefLHS, ZeroLHS;
  1885. APInt UndefRHS, ZeroRHS;
  1886. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
  1887. ZeroLHS, TLO, Depth + 1))
  1888. return true;
  1889. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
  1890. ZeroRHS, TLO, Depth + 1))
  1891. return true;
  1892. // Simplify mask using undef elements from LHS/RHS.
  1893. bool Updated = false;
  1894. bool IdentityLHS = true, IdentityRHS = true;
  1895. SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
  1896. for (unsigned i = 0; i != NumElts; ++i) {
  1897. int &M = NewMask[i];
  1898. if (M < 0)
  1899. continue;
  1900. if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
  1901. (M >= (int)NumElts && UndefRHS[M - NumElts])) {
  1902. Updated = true;
  1903. M = -1;
  1904. }
  1905. IdentityLHS &= (M < 0) || (M == (int)i);
  1906. IdentityRHS &= (M < 0) || ((M - NumElts) == i);
  1907. }
  1908. // Update legal shuffle masks based on demanded elements if it won't reduce
  1909. // to Identity which can cause premature removal of the shuffle mask.
  1910. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
  1911. isShuffleMaskLegal(NewMask, VT))
  1912. return TLO.CombineTo(Op,
  1913. TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
  1914. Op.getOperand(1), NewMask));
  1915. // Propagate undef/zero elements from LHS/RHS.
  1916. for (unsigned i = 0; i != NumElts; ++i) {
  1917. int M = ShuffleMask[i];
  1918. if (M < 0) {
  1919. KnownUndef.setBit(i);
  1920. } else if (M < (int)NumElts) {
  1921. if (UndefLHS[M])
  1922. KnownUndef.setBit(i);
  1923. if (ZeroLHS[M])
  1924. KnownZero.setBit(i);
  1925. } else {
  1926. if (UndefRHS[M - NumElts])
  1927. KnownUndef.setBit(i);
  1928. if (ZeroRHS[M - NumElts])
  1929. KnownZero.setBit(i);
  1930. }
  1931. }
  1932. break;
  1933. }
  1934. case ISD::SIGN_EXTEND_VECTOR_INREG:
  1935. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  1936. APInt SrcUndef, SrcZero;
  1937. SDValue Src = Op.getOperand(0);
  1938. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  1939. APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
  1940. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  1941. Depth + 1))
  1942. return true;
  1943. KnownZero = SrcZero.zextOrTrunc(NumElts);
  1944. KnownUndef = SrcUndef.zextOrTrunc(NumElts);
  1945. if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
  1946. // zext(undef) upper bits are guaranteed to be zero.
  1947. if (DemandedElts.isSubsetOf(KnownUndef))
  1948. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  1949. KnownUndef.clearAllBits();
  1950. }
  1951. break;
  1952. }
  1953. // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
  1954. // MAX, saturated math, etc.
  1955. case ISD::OR:
  1956. case ISD::XOR:
  1957. case ISD::ADD:
  1958. case ISD::SUB:
  1959. case ISD::FADD:
  1960. case ISD::FSUB:
  1961. case ISD::FMUL:
  1962. case ISD::FDIV:
  1963. case ISD::FREM: {
  1964. APInt UndefRHS, ZeroRHS;
  1965. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
  1966. ZeroRHS, TLO, Depth + 1))
  1967. return true;
  1968. APInt UndefLHS, ZeroLHS;
  1969. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
  1970. ZeroLHS, TLO, Depth + 1))
  1971. return true;
  1972. KnownZero = ZeroLHS & ZeroRHS;
  1973. KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
  1974. break;
  1975. }
  1976. case ISD::AND: {
  1977. APInt SrcUndef, SrcZero;
  1978. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
  1979. SrcZero, TLO, Depth + 1))
  1980. return true;
  1981. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  1982. KnownZero, TLO, Depth + 1))
  1983. return true;
  1984. // If either side has a zero element, then the result element is zero, even
  1985. // if the other is an UNDEF.
  1986. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
  1987. // and then handle 'and' nodes with the rest of the binop opcodes.
  1988. KnownZero |= SrcZero;
  1989. KnownUndef &= SrcUndef;
  1990. KnownUndef &= ~KnownZero;
  1991. break;
  1992. }
  1993. case ISD::TRUNCATE:
  1994. case ISD::SIGN_EXTEND:
  1995. case ISD::ZERO_EXTEND:
  1996. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  1997. KnownZero, TLO, Depth + 1))
  1998. return true;
  1999. if (Op.getOpcode() == ISD::ZERO_EXTEND) {
  2000. // zext(undef) upper bits are guaranteed to be zero.
  2001. if (DemandedElts.isSubsetOf(KnownUndef))
  2002. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2003. KnownUndef.clearAllBits();
  2004. }
  2005. break;
  2006. default: {
  2007. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  2008. if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
  2009. KnownZero, TLO, Depth))
  2010. return true;
  2011. } else {
  2012. KnownBits Known;
  2013. APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
  2014. if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
  2015. Depth, AssumeSingleUse))
  2016. return true;
  2017. }
  2018. break;
  2019. }
  2020. }
  2021. assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
  2022. // Constant fold all undef cases.
  2023. // TODO: Handle zero cases as well.
  2024. if (DemandedElts.isSubsetOf(KnownUndef))
  2025. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2026. return false;
  2027. }
  2028. /// Determine which of the bits specified in Mask are known to be either zero or
  2029. /// one and return them in the Known.
  2030. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  2031. KnownBits &Known,
  2032. const APInt &DemandedElts,
  2033. const SelectionDAG &DAG,
  2034. unsigned Depth) const {
  2035. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2036. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2037. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2038. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2039. "Should use MaskedValueIsZero if you don't know whether Op"
  2040. " is a target node!");
  2041. Known.resetAll();
  2042. }
  2043. void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
  2044. KnownBits &Known,
  2045. const APInt &DemandedElts,
  2046. const SelectionDAG &DAG,
  2047. unsigned Depth) const {
  2048. assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
  2049. if (unsigned Align = DAG.InferPtrAlignment(Op)) {
  2050. // The low bits are known zero if the pointer is aligned.
  2051. Known.Zero.setLowBits(Log2_32(Align));
  2052. }
  2053. }
  2054. /// This method can be implemented by targets that want to expose additional
  2055. /// information about sign bits to the DAG Combiner.
  2056. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
  2057. const APInt &,
  2058. const SelectionDAG &,
  2059. unsigned Depth) const {
  2060. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2061. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2062. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2063. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2064. "Should use ComputeNumSignBits if you don't know whether Op"
  2065. " is a target node!");
  2066. return 1;
  2067. }
  2068. bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
  2069. SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
  2070. TargetLoweringOpt &TLO, unsigned Depth) const {
  2071. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2072. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2073. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2074. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2075. "Should use SimplifyDemandedVectorElts if you don't know whether Op"
  2076. " is a target node!");
  2077. return false;
  2078. }
  2079. bool TargetLowering::SimplifyDemandedBitsForTargetNode(
  2080. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  2081. KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
  2082. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2083. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2084. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2085. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2086. "Should use SimplifyDemandedBits if you don't know whether Op"
  2087. " is a target node!");
  2088. computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
  2089. return false;
  2090. }
  2091. const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
  2092. return nullptr;
  2093. }
  2094. bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
  2095. const SelectionDAG &DAG,
  2096. bool SNaN,
  2097. unsigned Depth) const {
  2098. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2099. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2100. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2101. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2102. "Should use isKnownNeverNaN if you don't know whether Op"
  2103. " is a target node!");
  2104. return false;
  2105. }
  2106. // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
  2107. // work with truncating build vectors and vectors with elements of less than
  2108. // 8 bits.
  2109. bool TargetLowering::isConstTrueVal(const SDNode *N) const {
  2110. if (!N)
  2111. return false;
  2112. APInt CVal;
  2113. if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
  2114. CVal = CN->getAPIntValue();
  2115. } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
  2116. auto *CN = BV->getConstantSplatNode();
  2117. if (!CN)
  2118. return false;
  2119. // If this is a truncating build vector, truncate the splat value.
  2120. // Otherwise, we may fail to match the expected values below.
  2121. unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
  2122. CVal = CN->getAPIntValue();
  2123. if (BVEltWidth < CVal.getBitWidth())
  2124. CVal = CVal.trunc(BVEltWidth);
  2125. } else {
  2126. return false;
  2127. }
  2128. switch (getBooleanContents(N->getValueType(0))) {
  2129. case UndefinedBooleanContent:
  2130. return CVal[0];
  2131. case ZeroOrOneBooleanContent:
  2132. return CVal.isOneValue();
  2133. case ZeroOrNegativeOneBooleanContent:
  2134. return CVal.isAllOnesValue();
  2135. }
  2136. llvm_unreachable("Invalid boolean contents");
  2137. }
  2138. bool TargetLowering::isConstFalseVal(const SDNode *N) const {
  2139. if (!N)
  2140. return false;
  2141. const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  2142. if (!CN) {
  2143. const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
  2144. if (!BV)
  2145. return false;
  2146. // Only interested in constant splats, we don't care about undef
  2147. // elements in identifying boolean constants and getConstantSplatNode
  2148. // returns NULL if all ops are undef;
  2149. CN = BV->getConstantSplatNode();
  2150. if (!CN)
  2151. return false;
  2152. }
  2153. if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
  2154. return !CN->getAPIntValue()[0];
  2155. return CN->isNullValue();
  2156. }
  2157. bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
  2158. bool SExt) const {
  2159. if (VT == MVT::i1)
  2160. return N->isOne();
  2161. TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
  2162. switch (Cnt) {
  2163. case TargetLowering::ZeroOrOneBooleanContent:
  2164. // An extended value of 1 is always true, unless its original type is i1,
  2165. // in which case it will be sign extended to -1.
  2166. return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
  2167. case TargetLowering::UndefinedBooleanContent:
  2168. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  2169. return N->isAllOnesValue() && SExt;
  2170. }
  2171. llvm_unreachable("Unexpected enumeration.");
  2172. }
  2173. /// This helper function of SimplifySetCC tries to optimize the comparison when
  2174. /// either operand of the SetCC node is a bitwise-and instruction.
  2175. SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
  2176. ISD::CondCode Cond, const SDLoc &DL,
  2177. DAGCombinerInfo &DCI) const {
  2178. // Match these patterns in any of their permutations:
  2179. // (X & Y) == Y
  2180. // (X & Y) != Y
  2181. if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
  2182. std::swap(N0, N1);
  2183. EVT OpVT = N0.getValueType();
  2184. if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
  2185. (Cond != ISD::SETEQ && Cond != ISD::SETNE))
  2186. return SDValue();
  2187. SDValue X, Y;
  2188. if (N0.getOperand(0) == N1) {
  2189. X = N0.getOperand(1);
  2190. Y = N0.getOperand(0);
  2191. } else if (N0.getOperand(1) == N1) {
  2192. X = N0.getOperand(0);
  2193. Y = N0.getOperand(1);
  2194. } else {
  2195. return SDValue();
  2196. }
  2197. SelectionDAG &DAG = DCI.DAG;
  2198. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  2199. if (DAG.isKnownToBeAPowerOfTwo(Y)) {
  2200. // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
  2201. // Note that where Y is variable and is known to have at most one bit set
  2202. // (for example, if it is Z & 1) we cannot do this; the expressions are not
  2203. // equivalent when Y == 0.
  2204. Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
  2205. if (DCI.isBeforeLegalizeOps() ||
  2206. isCondCodeLegal(Cond, N0.getSimpleValueType()))
  2207. return DAG.getSetCC(DL, VT, N0, Zero, Cond);
  2208. } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
  2209. // If the target supports an 'and-not' or 'and-complement' logic operation,
  2210. // try to use that to make a comparison operation more efficient.
  2211. // But don't do this transform if the mask is a single bit because there are
  2212. // more efficient ways to deal with that case (for example, 'bt' on x86 or
  2213. // 'rlwinm' on PPC).
  2214. // Bail out if the compare operand that we want to turn into a zero is
  2215. // already a zero (otherwise, infinite loop).
  2216. auto *YConst = dyn_cast<ConstantSDNode>(Y);
  2217. if (YConst && YConst->isNullValue())
  2218. return SDValue();
  2219. // Transform this into: ~X & Y == 0.
  2220. SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
  2221. SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
  2222. return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
  2223. }
  2224. return SDValue();
  2225. }
  2226. /// There are multiple IR patterns that could be checking whether certain
  2227. /// truncation of a signed number would be lossy or not. The pattern which is
  2228. /// best at IR level, may not lower optimally. Thus, we want to unfold it.
  2229. /// We are looking for the following pattern: (KeptBits is a constant)
  2230. /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
  2231. /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
  2232. /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
  2233. /// We will unfold it into the natural trunc+sext pattern:
  2234. /// ((%x << C) a>> C) dstcond %x
  2235. /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
  2236. SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
  2237. EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
  2238. const SDLoc &DL) const {
  2239. // We must be comparing with a constant.
  2240. ConstantSDNode *C1;
  2241. if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
  2242. return SDValue();
  2243. // N0 should be: add %x, (1 << (KeptBits-1))
  2244. if (N0->getOpcode() != ISD::ADD)
  2245. return SDValue();
  2246. // And we must be 'add'ing a constant.
  2247. ConstantSDNode *C01;
  2248. if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
  2249. return SDValue();
  2250. SDValue X = N0->getOperand(0);
  2251. EVT XVT = X.getValueType();
  2252. // Validate constants ...
  2253. APInt I1 = C1->getAPIntValue();
  2254. ISD::CondCode NewCond;
  2255. if (Cond == ISD::CondCode::SETULT) {
  2256. NewCond = ISD::CondCode::SETEQ;
  2257. } else if (Cond == ISD::CondCode::SETULE) {
  2258. NewCond = ISD::CondCode::SETEQ;
  2259. // But need to 'canonicalize' the constant.
  2260. I1 += 1;
  2261. } else if (Cond == ISD::CondCode::SETUGT) {
  2262. NewCond = ISD::CondCode::SETNE;
  2263. // But need to 'canonicalize' the constant.
  2264. I1 += 1;
  2265. } else if (Cond == ISD::CondCode::SETUGE) {
  2266. NewCond = ISD::CondCode::SETNE;
  2267. } else
  2268. return SDValue();
  2269. APInt I01 = C01->getAPIntValue();
  2270. auto checkConstants = [&I1, &I01]() -> bool {
  2271. // Both of them must be power-of-two, and the constant from setcc is bigger.
  2272. return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
  2273. };
  2274. if (checkConstants()) {
  2275. // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
  2276. } else {
  2277. // What if we invert constants? (and the target predicate)
  2278. I1.negate();
  2279. I01.negate();
  2280. NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
  2281. if (!checkConstants())
  2282. return SDValue();
  2283. // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
  2284. }
  2285. // They are power-of-two, so which bit is set?
  2286. const unsigned KeptBits = I1.logBase2();
  2287. const unsigned KeptBitsMinusOne = I01.logBase2();
  2288. // Magic!
  2289. if (KeptBits != (KeptBitsMinusOne + 1))
  2290. return SDValue();
  2291. assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
  2292. // We don't want to do this in every single case.
  2293. SelectionDAG &DAG = DCI.DAG;
  2294. if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
  2295. XVT, KeptBits))
  2296. return SDValue();
  2297. const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
  2298. assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
  2299. // Unfold into: ((%x << C) a>> C) cond %x
  2300. // Where 'cond' will be either 'eq' or 'ne'.
  2301. SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
  2302. SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
  2303. SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
  2304. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
  2305. return T2;
  2306. }
  2307. /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
  2308. /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
  2309. /// handle the commuted versions of these patterns.
  2310. SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
  2311. ISD::CondCode Cond, const SDLoc &DL,
  2312. DAGCombinerInfo &DCI) const {
  2313. unsigned BOpcode = N0.getOpcode();
  2314. assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
  2315. "Unexpected binop");
  2316. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
  2317. // (X + Y) == X --> Y == 0
  2318. // (X - Y) == X --> Y == 0
  2319. // (X ^ Y) == X --> Y == 0
  2320. SelectionDAG &DAG = DCI.DAG;
  2321. EVT OpVT = N0.getValueType();
  2322. SDValue X = N0.getOperand(0);
  2323. SDValue Y = N0.getOperand(1);
  2324. if (X == N1)
  2325. return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
  2326. if (Y != N1)
  2327. return SDValue();
  2328. // (X + Y) == Y --> X == 0
  2329. // (X ^ Y) == Y --> X == 0
  2330. if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
  2331. return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
  2332. // The shift would not be valid if the operands are boolean (i1).
  2333. if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
  2334. return SDValue();
  2335. // (X - Y) == Y --> X == Y << 1
  2336. EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
  2337. !DCI.isBeforeLegalize());
  2338. SDValue One = DAG.getConstant(1, DL, ShiftVT);
  2339. SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
  2340. if (!DCI.isCalledByLegalizer())
  2341. DCI.AddToWorklist(YShl1.getNode());
  2342. return DAG.getSetCC(DL, VT, X, YShl1, Cond);
  2343. }
  2344. /// Try to simplify a setcc built with the specified operands and cc. If it is
  2345. /// unable to simplify it, return a null SDValue.
  2346. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  2347. ISD::CondCode Cond, bool foldBooleans,
  2348. DAGCombinerInfo &DCI,
  2349. const SDLoc &dl) const {
  2350. SelectionDAG &DAG = DCI.DAG;
  2351. EVT OpVT = N0.getValueType();
  2352. // Constant fold or commute setcc.
  2353. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
  2354. return Fold;
  2355. // Ensure that the constant occurs on the RHS and fold constant comparisons.
  2356. // TODO: Handle non-splat vector constants. All undef causes trouble.
  2357. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
  2358. if (isConstOrConstSplat(N0) &&
  2359. (DCI.isBeforeLegalizeOps() ||
  2360. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
  2361. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  2362. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  2363. const APInt &C1 = N1C->getAPIntValue();
  2364. // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
  2365. // equality comparison, then we're just comparing whether X itself is
  2366. // zero.
  2367. if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
  2368. N0.getOperand(0).getOpcode() == ISD::CTLZ &&
  2369. N0.getOperand(1).getOpcode() == ISD::Constant) {
  2370. const APInt &ShAmt = N0.getConstantOperandAPInt(1);
  2371. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2372. ShAmt == Log2_32(N0.getValueSizeInBits())) {
  2373. if ((C1 == 0) == (Cond == ISD::SETEQ)) {
  2374. // (srl (ctlz x), 5) == 0 -> X != 0
  2375. // (srl (ctlz x), 5) != 1 -> X != 0
  2376. Cond = ISD::SETNE;
  2377. } else {
  2378. // (srl (ctlz x), 5) != 0 -> X == 0
  2379. // (srl (ctlz x), 5) == 1 -> X == 0
  2380. Cond = ISD::SETEQ;
  2381. }
  2382. SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
  2383. return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
  2384. Zero, Cond);
  2385. }
  2386. }
  2387. SDValue CTPOP = N0;
  2388. // Look through truncs that don't change the value of a ctpop.
  2389. if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
  2390. CTPOP = N0.getOperand(0);
  2391. if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
  2392. (N0 == CTPOP ||
  2393. N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
  2394. EVT CTVT = CTPOP.getValueType();
  2395. SDValue CTOp = CTPOP.getOperand(0);
  2396. // (ctpop x) u< 2 -> (x & x-1) == 0
  2397. // (ctpop x) u> 1 -> (x & x-1) != 0
  2398. if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
  2399. SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
  2400. DAG.getConstant(1, dl, CTVT));
  2401. SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
  2402. ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
  2403. return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
  2404. }
  2405. // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
  2406. }
  2407. // (zext x) == C --> x == (trunc C)
  2408. // (sext x) == C --> x == (trunc C)
  2409. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2410. DCI.isBeforeLegalize() && N0->hasOneUse()) {
  2411. unsigned MinBits = N0.getValueSizeInBits();
  2412. SDValue PreExt;
  2413. bool Signed = false;
  2414. if (N0->getOpcode() == ISD::ZERO_EXTEND) {
  2415. // ZExt
  2416. MinBits = N0->getOperand(0).getValueSizeInBits();
  2417. PreExt = N0->getOperand(0);
  2418. } else if (N0->getOpcode() == ISD::AND) {
  2419. // DAGCombine turns costly ZExts into ANDs
  2420. if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
  2421. if ((C->getAPIntValue()+1).isPowerOf2()) {
  2422. MinBits = C->getAPIntValue().countTrailingOnes();
  2423. PreExt = N0->getOperand(0);
  2424. }
  2425. } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
  2426. // SExt
  2427. MinBits = N0->getOperand(0).getValueSizeInBits();
  2428. PreExt = N0->getOperand(0);
  2429. Signed = true;
  2430. } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
  2431. // ZEXTLOAD / SEXTLOAD
  2432. if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
  2433. MinBits = LN0->getMemoryVT().getSizeInBits();
  2434. PreExt = N0;
  2435. } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
  2436. Signed = true;
  2437. MinBits = LN0->getMemoryVT().getSizeInBits();
  2438. PreExt = N0;
  2439. }
  2440. }
  2441. // Figure out how many bits we need to preserve this constant.
  2442. unsigned ReqdBits = Signed ?
  2443. C1.getBitWidth() - C1.getNumSignBits() + 1 :
  2444. C1.getActiveBits();
  2445. // Make sure we're not losing bits from the constant.
  2446. if (MinBits > 0 &&
  2447. MinBits < C1.getBitWidth() &&
  2448. MinBits >= ReqdBits) {
  2449. EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
  2450. if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
  2451. // Will get folded away.
  2452. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
  2453. if (MinBits == 1 && C1 == 1)
  2454. // Invert the condition.
  2455. return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
  2456. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  2457. SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
  2458. return DAG.getSetCC(dl, VT, Trunc, C, Cond);
  2459. }
  2460. // If truncating the setcc operands is not desirable, we can still
  2461. // simplify the expression in some cases:
  2462. // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
  2463. // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
  2464. // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
  2465. // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
  2466. // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
  2467. // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
  2468. SDValue TopSetCC = N0->getOperand(0);
  2469. unsigned N0Opc = N0->getOpcode();
  2470. bool SExt = (N0Opc == ISD::SIGN_EXTEND);
  2471. if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
  2472. TopSetCC.getOpcode() == ISD::SETCC &&
  2473. (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
  2474. (isConstFalseVal(N1C) ||
  2475. isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
  2476. bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
  2477. (!N1C->isNullValue() && Cond == ISD::SETNE);
  2478. if (!Inverse)
  2479. return TopSetCC;
  2480. ISD::CondCode InvCond = ISD::getSetCCInverse(
  2481. cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
  2482. TopSetCC.getOperand(0).getValueType().isInteger());
  2483. return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
  2484. TopSetCC.getOperand(1),
  2485. InvCond);
  2486. }
  2487. }
  2488. }
  2489. // If the LHS is '(and load, const)', the RHS is 0, the test is for
  2490. // equality or unsigned, and all 1 bits of the const are in the same
  2491. // partial word, see if we can shorten the load.
  2492. if (DCI.isBeforeLegalize() &&
  2493. !ISD::isSignedIntSetCC(Cond) &&
  2494. N0.getOpcode() == ISD::AND && C1 == 0 &&
  2495. N0.getNode()->hasOneUse() &&
  2496. isa<LoadSDNode>(N0.getOperand(0)) &&
  2497. N0.getOperand(0).getNode()->hasOneUse() &&
  2498. isa<ConstantSDNode>(N0.getOperand(1))) {
  2499. LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
  2500. APInt bestMask;
  2501. unsigned bestWidth = 0, bestOffset = 0;
  2502. if (!Lod->isVolatile() && Lod->isUnindexed()) {
  2503. unsigned origWidth = N0.getValueSizeInBits();
  2504. unsigned maskWidth = origWidth;
  2505. // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
  2506. // 8 bits, but have to be careful...
  2507. if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
  2508. origWidth = Lod->getMemoryVT().getSizeInBits();
  2509. const APInt &Mask = N0.getConstantOperandAPInt(1);
  2510. for (unsigned width = origWidth / 2; width>=8; width /= 2) {
  2511. APInt newMask = APInt::getLowBitsSet(maskWidth, width);
  2512. for (unsigned offset=0; offset<origWidth/width; offset++) {
  2513. if (Mask.isSubsetOf(newMask)) {
  2514. if (DAG.getDataLayout().isLittleEndian())
  2515. bestOffset = (uint64_t)offset * (width/8);
  2516. else
  2517. bestOffset = (origWidth/width - offset - 1) * (width/8);
  2518. bestMask = Mask.lshr(offset * (width/8) * 8);
  2519. bestWidth = width;
  2520. break;
  2521. }
  2522. newMask <<= width;
  2523. }
  2524. }
  2525. }
  2526. if (bestWidth) {
  2527. EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
  2528. if (newVT.isRound() &&
  2529. shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
  2530. EVT PtrType = Lod->getOperand(1).getValueType();
  2531. SDValue Ptr = Lod->getBasePtr();
  2532. if (bestOffset != 0)
  2533. Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
  2534. DAG.getConstant(bestOffset, dl, PtrType));
  2535. unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
  2536. SDValue NewLoad = DAG.getLoad(
  2537. newVT, dl, Lod->getChain(), Ptr,
  2538. Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
  2539. return DAG.getSetCC(dl, VT,
  2540. DAG.getNode(ISD::AND, dl, newVT, NewLoad,
  2541. DAG.getConstant(bestMask.trunc(bestWidth),
  2542. dl, newVT)),
  2543. DAG.getConstant(0LL, dl, newVT), Cond);
  2544. }
  2545. }
  2546. }
  2547. // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
  2548. if (N0.getOpcode() == ISD::ZERO_EXTEND) {
  2549. unsigned InSize = N0.getOperand(0).getValueSizeInBits();
  2550. // If the comparison constant has bits in the upper part, the
  2551. // zero-extended value could never match.
  2552. if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
  2553. C1.getBitWidth() - InSize))) {
  2554. switch (Cond) {
  2555. case ISD::SETUGT:
  2556. case ISD::SETUGE:
  2557. case ISD::SETEQ:
  2558. return DAG.getConstant(0, dl, VT);
  2559. case ISD::SETULT:
  2560. case ISD::SETULE:
  2561. case ISD::SETNE:
  2562. return DAG.getConstant(1, dl, VT);
  2563. case ISD::SETGT:
  2564. case ISD::SETGE:
  2565. // True if the sign bit of C1 is set.
  2566. return DAG.getConstant(C1.isNegative(), dl, VT);
  2567. case ISD::SETLT:
  2568. case ISD::SETLE:
  2569. // True if the sign bit of C1 isn't set.
  2570. return DAG.getConstant(C1.isNonNegative(), dl, VT);
  2571. default:
  2572. break;
  2573. }
  2574. }
  2575. // Otherwise, we can perform the comparison with the low bits.
  2576. switch (Cond) {
  2577. case ISD::SETEQ:
  2578. case ISD::SETNE:
  2579. case ISD::SETUGT:
  2580. case ISD::SETUGE:
  2581. case ISD::SETULT:
  2582. case ISD::SETULE: {
  2583. EVT newVT = N0.getOperand(0).getValueType();
  2584. if (DCI.isBeforeLegalizeOps() ||
  2585. (isOperationLegal(ISD::SETCC, newVT) &&
  2586. isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
  2587. EVT NewSetCCVT =
  2588. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
  2589. SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
  2590. SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
  2591. NewConst, Cond);
  2592. return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
  2593. }
  2594. break;
  2595. }
  2596. default:
  2597. break; // todo, be more careful with signed comparisons
  2598. }
  2599. } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  2600. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  2601. EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
  2602. unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
  2603. EVT ExtDstTy = N0.getValueType();
  2604. unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
  2605. // If the constant doesn't fit into the number of bits for the source of
  2606. // the sign extension, it is impossible for both sides to be equal.
  2607. if (C1.getMinSignedBits() > ExtSrcTyBits)
  2608. return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
  2609. SDValue ZextOp;
  2610. EVT Op0Ty = N0.getOperand(0).getValueType();
  2611. if (Op0Ty == ExtSrcTy) {
  2612. ZextOp = N0.getOperand(0);
  2613. } else {
  2614. APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
  2615. ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
  2616. DAG.getConstant(Imm, dl, Op0Ty));
  2617. }
  2618. if (!DCI.isCalledByLegalizer())
  2619. DCI.AddToWorklist(ZextOp.getNode());
  2620. // Otherwise, make this a use of a zext.
  2621. return DAG.getSetCC(dl, VT, ZextOp,
  2622. DAG.getConstant(C1 & APInt::getLowBitsSet(
  2623. ExtDstTyBits,
  2624. ExtSrcTyBits),
  2625. dl, ExtDstTy),
  2626. Cond);
  2627. } else if ((N1C->isNullValue() || N1C->isOne()) &&
  2628. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  2629. // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
  2630. if (N0.getOpcode() == ISD::SETCC &&
  2631. isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
  2632. bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
  2633. if (TrueWhenTrue)
  2634. return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
  2635. // Invert the condition.
  2636. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  2637. CC = ISD::getSetCCInverse(CC,
  2638. N0.getOperand(0).getValueType().isInteger());
  2639. if (DCI.isBeforeLegalizeOps() ||
  2640. isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
  2641. return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
  2642. }
  2643. if ((N0.getOpcode() == ISD::XOR ||
  2644. (N0.getOpcode() == ISD::AND &&
  2645. N0.getOperand(0).getOpcode() == ISD::XOR &&
  2646. N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
  2647. isa<ConstantSDNode>(N0.getOperand(1)) &&
  2648. cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
  2649. // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
  2650. // can only do this if the top bits are known zero.
  2651. unsigned BitWidth = N0.getValueSizeInBits();
  2652. if (DAG.MaskedValueIsZero(N0,
  2653. APInt::getHighBitsSet(BitWidth,
  2654. BitWidth-1))) {
  2655. // Okay, get the un-inverted input value.
  2656. SDValue Val;
  2657. if (N0.getOpcode() == ISD::XOR) {
  2658. Val = N0.getOperand(0);
  2659. } else {
  2660. assert(N0.getOpcode() == ISD::AND &&
  2661. N0.getOperand(0).getOpcode() == ISD::XOR);
  2662. // ((X^1)&1)^1 -> X & 1
  2663. Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
  2664. N0.getOperand(0).getOperand(0),
  2665. N0.getOperand(1));
  2666. }
  2667. return DAG.getSetCC(dl, VT, Val, N1,
  2668. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  2669. }
  2670. } else if (N1C->isOne() &&
  2671. (VT == MVT::i1 ||
  2672. getBooleanContents(N0->getValueType(0)) ==
  2673. ZeroOrOneBooleanContent)) {
  2674. SDValue Op0 = N0;
  2675. if (Op0.getOpcode() == ISD::TRUNCATE)
  2676. Op0 = Op0.getOperand(0);
  2677. if ((Op0.getOpcode() == ISD::XOR) &&
  2678. Op0.getOperand(0).getOpcode() == ISD::SETCC &&
  2679. Op0.getOperand(1).getOpcode() == ISD::SETCC) {
  2680. // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
  2681. Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
  2682. return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
  2683. Cond);
  2684. }
  2685. if (Op0.getOpcode() == ISD::AND &&
  2686. isa<ConstantSDNode>(Op0.getOperand(1)) &&
  2687. cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
  2688. // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
  2689. if (Op0.getValueType().bitsGT(VT))
  2690. Op0 = DAG.getNode(ISD::AND, dl, VT,
  2691. DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
  2692. DAG.getConstant(1, dl, VT));
  2693. else if (Op0.getValueType().bitsLT(VT))
  2694. Op0 = DAG.getNode(ISD::AND, dl, VT,
  2695. DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
  2696. DAG.getConstant(1, dl, VT));
  2697. return DAG.getSetCC(dl, VT, Op0,
  2698. DAG.getConstant(0, dl, Op0.getValueType()),
  2699. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  2700. }
  2701. if (Op0.getOpcode() == ISD::AssertZext &&
  2702. cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
  2703. return DAG.getSetCC(dl, VT, Op0,
  2704. DAG.getConstant(0, dl, Op0.getValueType()),
  2705. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  2706. }
  2707. }
  2708. if (SDValue V =
  2709. optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
  2710. return V;
  2711. }
  2712. // These simplifications apply to splat vectors as well.
  2713. // TODO: Handle more splat vector cases.
  2714. if (auto *N1C = isConstOrConstSplat(N1)) {
  2715. const APInt &C1 = N1C->getAPIntValue();
  2716. APInt MinVal, MaxVal;
  2717. unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
  2718. if (ISD::isSignedIntSetCC(Cond)) {
  2719. MinVal = APInt::getSignedMinValue(OperandBitSize);
  2720. MaxVal = APInt::getSignedMaxValue(OperandBitSize);
  2721. } else {
  2722. MinVal = APInt::getMinValue(OperandBitSize);
  2723. MaxVal = APInt::getMaxValue(OperandBitSize);
  2724. }
  2725. // Canonicalize GE/LE comparisons to use GT/LT comparisons.
  2726. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
  2727. // X >= MIN --> true
  2728. if (C1 == MinVal)
  2729. return DAG.getBoolConstant(true, dl, VT, OpVT);
  2730. if (!VT.isVector()) { // TODO: Support this for vectors.
  2731. // X >= C0 --> X > (C0 - 1)
  2732. APInt C = C1 - 1;
  2733. ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
  2734. if ((DCI.isBeforeLegalizeOps() ||
  2735. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  2736. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  2737. isLegalICmpImmediate(C.getSExtValue())))) {
  2738. return DAG.getSetCC(dl, VT, N0,
  2739. DAG.getConstant(C, dl, N1.getValueType()),
  2740. NewCC);
  2741. }
  2742. }
  2743. }
  2744. if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
  2745. // X <= MAX --> true
  2746. if (C1 == MaxVal)
  2747. return DAG.getBoolConstant(true, dl, VT, OpVT);
  2748. // X <= C0 --> X < (C0 + 1)
  2749. if (!VT.isVector()) { // TODO: Support this for vectors.
  2750. APInt C = C1 + 1;
  2751. ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
  2752. if ((DCI.isBeforeLegalizeOps() ||
  2753. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  2754. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  2755. isLegalICmpImmediate(C.getSExtValue())))) {
  2756. return DAG.getSetCC(dl, VT, N0,
  2757. DAG.getConstant(C, dl, N1.getValueType()),
  2758. NewCC);
  2759. }
  2760. }
  2761. }
  2762. if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
  2763. if (C1 == MinVal)
  2764. return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
  2765. // TODO: Support this for vectors after legalize ops.
  2766. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  2767. // Canonicalize setlt X, Max --> setne X, Max
  2768. if (C1 == MaxVal)
  2769. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  2770. // If we have setult X, 1, turn it into seteq X, 0
  2771. if (C1 == MinVal+1)
  2772. return DAG.getSetCC(dl, VT, N0,
  2773. DAG.getConstant(MinVal, dl, N0.getValueType()),
  2774. ISD::SETEQ);
  2775. }
  2776. }
  2777. if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
  2778. if (C1 == MaxVal)
  2779. return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
  2780. // TODO: Support this for vectors after legalize ops.
  2781. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  2782. // Canonicalize setgt X, Min --> setne X, Min
  2783. if (C1 == MinVal)
  2784. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  2785. // If we have setugt X, Max-1, turn it into seteq X, Max
  2786. if (C1 == MaxVal-1)
  2787. return DAG.getSetCC(dl, VT, N0,
  2788. DAG.getConstant(MaxVal, dl, N0.getValueType()),
  2789. ISD::SETEQ);
  2790. }
  2791. }
  2792. // If we have "setcc X, C0", check to see if we can shrink the immediate
  2793. // by changing cc.
  2794. // TODO: Support this for vectors after legalize ops.
  2795. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  2796. // SETUGT X, SINTMAX -> SETLT X, 0
  2797. if (Cond == ISD::SETUGT &&
  2798. C1 == APInt::getSignedMaxValue(OperandBitSize))
  2799. return DAG.getSetCC(dl, VT, N0,
  2800. DAG.getConstant(0, dl, N1.getValueType()),
  2801. ISD::SETLT);
  2802. // SETULT X, SINTMIN -> SETGT X, -1
  2803. if (Cond == ISD::SETULT &&
  2804. C1 == APInt::getSignedMinValue(OperandBitSize)) {
  2805. SDValue ConstMinusOne =
  2806. DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
  2807. N1.getValueType());
  2808. return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
  2809. }
  2810. }
  2811. }
  2812. // Back to non-vector simplifications.
  2813. // TODO: Can we do these for vector splats?
  2814. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  2815. const APInt &C1 = N1C->getAPIntValue();
  2816. // Fold bit comparisons when we can.
  2817. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2818. (VT == N0.getValueType() ||
  2819. (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
  2820. N0.getOpcode() == ISD::AND) {
  2821. auto &DL = DAG.getDataLayout();
  2822. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  2823. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  2824. !DCI.isBeforeLegalize());
  2825. if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
  2826. // Perform the xform if the AND RHS is a single bit.
  2827. if (AndRHS->getAPIntValue().isPowerOf2()) {
  2828. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  2829. DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
  2830. DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
  2831. ShiftTy)));
  2832. }
  2833. } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
  2834. // (X & 8) == 8 --> (X & 8) >> 3
  2835. // Perform the xform if C1 is a single bit.
  2836. if (C1.isPowerOf2()) {
  2837. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  2838. DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
  2839. DAG.getConstant(C1.logBase2(), dl,
  2840. ShiftTy)));
  2841. }
  2842. }
  2843. }
  2844. }
  2845. if (C1.getMinSignedBits() <= 64 &&
  2846. !isLegalICmpImmediate(C1.getSExtValue())) {
  2847. // (X & -256) == 256 -> (X >> 8) == 1
  2848. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2849. N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
  2850. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  2851. const APInt &AndRHSC = AndRHS->getAPIntValue();
  2852. if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
  2853. unsigned ShiftBits = AndRHSC.countTrailingZeros();
  2854. auto &DL = DAG.getDataLayout();
  2855. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  2856. !DCI.isBeforeLegalize());
  2857. EVT CmpTy = N0.getValueType();
  2858. SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
  2859. DAG.getConstant(ShiftBits, dl,
  2860. ShiftTy));
  2861. SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
  2862. return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
  2863. }
  2864. }
  2865. } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
  2866. Cond == ISD::SETULE || Cond == ISD::SETUGT) {
  2867. bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
  2868. // X < 0x100000000 -> (X >> 32) < 1
  2869. // X >= 0x100000000 -> (X >> 32) >= 1
  2870. // X <= 0x0ffffffff -> (X >> 32) < 1
  2871. // X > 0x0ffffffff -> (X >> 32) >= 1
  2872. unsigned ShiftBits;
  2873. APInt NewC = C1;
  2874. ISD::CondCode NewCond = Cond;
  2875. if (AdjOne) {
  2876. ShiftBits = C1.countTrailingOnes();
  2877. NewC = NewC + 1;
  2878. NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
  2879. } else {
  2880. ShiftBits = C1.countTrailingZeros();
  2881. }
  2882. NewC.lshrInPlace(ShiftBits);
  2883. if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
  2884. isLegalICmpImmediate(NewC.getSExtValue())) {
  2885. auto &DL = DAG.getDataLayout();
  2886. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  2887. !DCI.isBeforeLegalize());
  2888. EVT CmpTy = N0.getValueType();
  2889. SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
  2890. DAG.getConstant(ShiftBits, dl, ShiftTy));
  2891. SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
  2892. return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
  2893. }
  2894. }
  2895. }
  2896. }
  2897. if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
  2898. auto *CFP = cast<ConstantFPSDNode>(N1);
  2899. assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
  2900. // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
  2901. // constant if knowing that the operand is non-nan is enough. We prefer to
  2902. // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
  2903. // materialize 0.0.
  2904. if (Cond == ISD::SETO || Cond == ISD::SETUO)
  2905. return DAG.getSetCC(dl, VT, N0, N0, Cond);
  2906. // setcc (fneg x), C -> setcc swap(pred) x, -C
  2907. if (N0.getOpcode() == ISD::FNEG) {
  2908. ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
  2909. if (DCI.isBeforeLegalizeOps() ||
  2910. isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
  2911. SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
  2912. return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
  2913. }
  2914. }
  2915. // If the condition is not legal, see if we can find an equivalent one
  2916. // which is legal.
  2917. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
  2918. // If the comparison was an awkward floating-point == or != and one of
  2919. // the comparison operands is infinity or negative infinity, convert the
  2920. // condition to a less-awkward <= or >=.
  2921. if (CFP->getValueAPF().isInfinity()) {
  2922. if (CFP->getValueAPF().isNegative()) {
  2923. if (Cond == ISD::SETOEQ &&
  2924. isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
  2925. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
  2926. if (Cond == ISD::SETUEQ &&
  2927. isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
  2928. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
  2929. if (Cond == ISD::SETUNE &&
  2930. isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
  2931. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
  2932. if (Cond == ISD::SETONE &&
  2933. isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
  2934. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
  2935. } else {
  2936. if (Cond == ISD::SETOEQ &&
  2937. isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
  2938. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
  2939. if (Cond == ISD::SETUEQ &&
  2940. isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
  2941. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
  2942. if (Cond == ISD::SETUNE &&
  2943. isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
  2944. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
  2945. if (Cond == ISD::SETONE &&
  2946. isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
  2947. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
  2948. }
  2949. }
  2950. }
  2951. }
  2952. if (N0 == N1) {
  2953. // The sext(setcc()) => setcc() optimization relies on the appropriate
  2954. // constant being emitted.
  2955. assert(!N0.getValueType().isInteger() &&
  2956. "Integer types should be handled by FoldSetCC");
  2957. bool EqTrue = ISD::isTrueWhenEqual(Cond);
  2958. unsigned UOF = ISD::getUnorderedFlavor(Cond);
  2959. if (UOF == 2) // FP operators that are undefined on NaNs.
  2960. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  2961. if (UOF == unsigned(EqTrue))
  2962. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  2963. // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
  2964. // if it is not already.
  2965. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
  2966. if (NewCond != Cond &&
  2967. (DCI.isBeforeLegalizeOps() ||
  2968. isCondCodeLegal(NewCond, N0.getSimpleValueType())))
  2969. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  2970. }
  2971. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2972. N0.getValueType().isInteger()) {
  2973. if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
  2974. N0.getOpcode() == ISD::XOR) {
  2975. // Simplify (X+Y) == (X+Z) --> Y == Z
  2976. if (N0.getOpcode() == N1.getOpcode()) {
  2977. if (N0.getOperand(0) == N1.getOperand(0))
  2978. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
  2979. if (N0.getOperand(1) == N1.getOperand(1))
  2980. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
  2981. if (isCommutativeBinOp(N0.getOpcode())) {
  2982. // If X op Y == Y op X, try other combinations.
  2983. if (N0.getOperand(0) == N1.getOperand(1))
  2984. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
  2985. Cond);
  2986. if (N0.getOperand(1) == N1.getOperand(0))
  2987. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
  2988. Cond);
  2989. }
  2990. }
  2991. // If RHS is a legal immediate value for a compare instruction, we need
  2992. // to be careful about increasing register pressure needlessly.
  2993. bool LegalRHSImm = false;
  2994. if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
  2995. if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  2996. // Turn (X+C1) == C2 --> X == C2-C1
  2997. if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
  2998. return DAG.getSetCC(dl, VT, N0.getOperand(0),
  2999. DAG.getConstant(RHSC->getAPIntValue()-
  3000. LHSR->getAPIntValue(),
  3001. dl, N0.getValueType()), Cond);
  3002. }
  3003. // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
  3004. if (N0.getOpcode() == ISD::XOR)
  3005. // If we know that all of the inverted bits are zero, don't bother
  3006. // performing the inversion.
  3007. if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
  3008. return
  3009. DAG.getSetCC(dl, VT, N0.getOperand(0),
  3010. DAG.getConstant(LHSR->getAPIntValue() ^
  3011. RHSC->getAPIntValue(),
  3012. dl, N0.getValueType()),
  3013. Cond);
  3014. }
  3015. // Turn (C1-X) == C2 --> X == C1-C2
  3016. if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
  3017. if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
  3018. return
  3019. DAG.getSetCC(dl, VT, N0.getOperand(1),
  3020. DAG.getConstant(SUBC->getAPIntValue() -
  3021. RHSC->getAPIntValue(),
  3022. dl, N0.getValueType()),
  3023. Cond);
  3024. }
  3025. }
  3026. // Could RHSC fold directly into a compare?
  3027. if (RHSC->getValueType(0).getSizeInBits() <= 64)
  3028. LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
  3029. }
  3030. // (X+Y) == X --> Y == 0 and similar folds.
  3031. // Don't do this if X is an immediate that can fold into a cmp
  3032. // instruction and X+Y has other uses. It could be an induction variable
  3033. // chain, and the transform would increase register pressure.
  3034. if (!LegalRHSImm || N0.hasOneUse())
  3035. if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
  3036. return V;
  3037. }
  3038. if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
  3039. N1.getOpcode() == ISD::XOR)
  3040. if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
  3041. return V;
  3042. if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
  3043. return V;
  3044. }
  3045. // Fold away ALL boolean setcc's.
  3046. if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
  3047. SDValue Temp;
  3048. switch (Cond) {
  3049. default: llvm_unreachable("Unknown integer setcc!");
  3050. case ISD::SETEQ: // X == Y -> ~(X^Y)
  3051. Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  3052. N0 = DAG.getNOT(dl, Temp, OpVT);
  3053. if (!DCI.isCalledByLegalizer())
  3054. DCI.AddToWorklist(Temp.getNode());
  3055. break;
  3056. case ISD::SETNE: // X != Y --> (X^Y)
  3057. N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  3058. break;
  3059. case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
  3060. case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
  3061. Temp = DAG.getNOT(dl, N0, OpVT);
  3062. N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
  3063. if (!DCI.isCalledByLegalizer())
  3064. DCI.AddToWorklist(Temp.getNode());
  3065. break;
  3066. case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
  3067. case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
  3068. Temp = DAG.getNOT(dl, N1, OpVT);
  3069. N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
  3070. if (!DCI.isCalledByLegalizer())
  3071. DCI.AddToWorklist(Temp.getNode());
  3072. break;
  3073. case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
  3074. case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
  3075. Temp = DAG.getNOT(dl, N0, OpVT);
  3076. N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
  3077. if (!DCI.isCalledByLegalizer())
  3078. DCI.AddToWorklist(Temp.getNode());
  3079. break;
  3080. case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
  3081. case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
  3082. Temp = DAG.getNOT(dl, N1, OpVT);
  3083. N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
  3084. break;
  3085. }
  3086. if (VT.getScalarType() != MVT::i1) {
  3087. if (!DCI.isCalledByLegalizer())
  3088. DCI.AddToWorklist(N0.getNode());
  3089. // FIXME: If running after legalize, we probably can't do this.
  3090. ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
  3091. N0 = DAG.getNode(ExtendCode, dl, VT, N0);
  3092. }
  3093. return N0;
  3094. }
  3095. // Could not fold it.
  3096. return SDValue();
  3097. }
  3098. /// Returns true (and the GlobalValue and the offset) if the node is a
  3099. /// GlobalAddress + offset.
  3100. bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
  3101. int64_t &Offset) const {
  3102. SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
  3103. if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
  3104. GA = GASD->getGlobal();
  3105. Offset += GASD->getOffset();
  3106. return true;
  3107. }
  3108. if (N->getOpcode() == ISD::ADD) {
  3109. SDValue N1 = N->getOperand(0);
  3110. SDValue N2 = N->getOperand(1);
  3111. if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
  3112. if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
  3113. Offset += V->getSExtValue();
  3114. return true;
  3115. }
  3116. } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
  3117. if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
  3118. Offset += V->getSExtValue();
  3119. return true;
  3120. }
  3121. }
  3122. }
  3123. return false;
  3124. }
  3125. SDValue TargetLowering::PerformDAGCombine(SDNode *N,
  3126. DAGCombinerInfo &DCI) const {
  3127. // Default implementation: no optimization.
  3128. return SDValue();
  3129. }
  3130. //===----------------------------------------------------------------------===//
  3131. // Inline Assembler Implementation Methods
  3132. //===----------------------------------------------------------------------===//
  3133. TargetLowering::ConstraintType
  3134. TargetLowering::getConstraintType(StringRef Constraint) const {
  3135. unsigned S = Constraint.size();
  3136. if (S == 1) {
  3137. switch (Constraint[0]) {
  3138. default: break;
  3139. case 'r': return C_RegisterClass;
  3140. case 'm': // memory
  3141. case 'o': // offsetable
  3142. case 'V': // not offsetable
  3143. return C_Memory;
  3144. case 'i': // Simple Integer or Relocatable Constant
  3145. case 'n': // Simple Integer
  3146. case 'E': // Floating Point Constant
  3147. case 'F': // Floating Point Constant
  3148. case 's': // Relocatable Constant
  3149. case 'p': // Address.
  3150. case 'X': // Allow ANY value.
  3151. case 'I': // Target registers.
  3152. case 'J':
  3153. case 'K':
  3154. case 'L':
  3155. case 'M':
  3156. case 'N':
  3157. case 'O':
  3158. case 'P':
  3159. case '<':
  3160. case '>':
  3161. return C_Other;
  3162. }
  3163. }
  3164. if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
  3165. if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
  3166. return C_Memory;
  3167. return C_Register;
  3168. }
  3169. return C_Unknown;
  3170. }
  3171. /// Try to replace an X constraint, which matches anything, with another that
  3172. /// has more specific requirements based on the type of the corresponding
  3173. /// operand.
  3174. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
  3175. if (ConstraintVT.isInteger())
  3176. return "r";
  3177. if (ConstraintVT.isFloatingPoint())
  3178. return "f"; // works for many targets
  3179. return nullptr;
  3180. }
  3181. SDValue TargetLowering::LowerAsmOutputForConstraint(
  3182. SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
  3183. SelectionDAG &DAG) const {
  3184. return SDValue();
  3185. }
  3186. /// Lower the specified operand into the Ops vector.
  3187. /// If it is invalid, don't add anything to Ops.
  3188. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  3189. std::string &Constraint,
  3190. std::vector<SDValue> &Ops,
  3191. SelectionDAG &DAG) const {
  3192. if (Constraint.length() > 1) return;
  3193. char ConstraintLetter = Constraint[0];
  3194. switch (ConstraintLetter) {
  3195. default: break;
  3196. case 'X': // Allows any operand; labels (basic block) use this.
  3197. if (Op.getOpcode() == ISD::BasicBlock ||
  3198. Op.getOpcode() == ISD::TargetBlockAddress) {
  3199. Ops.push_back(Op);
  3200. return;
  3201. }
  3202. LLVM_FALLTHROUGH;
  3203. case 'i': // Simple Integer or Relocatable Constant
  3204. case 'n': // Simple Integer
  3205. case 's': { // Relocatable Constant
  3206. GlobalAddressSDNode *GA;
  3207. ConstantSDNode *C;
  3208. uint64_t Offset = 0;
  3209. // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
  3210. // etc., since getelementpointer is variadic. We can't use
  3211. // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
  3212. // while in this case the GA may be furthest from the root node which is
  3213. // likely an ISD::ADD.
  3214. while (1) {
  3215. if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
  3216. Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
  3217. GA->getValueType(0),
  3218. Offset + GA->getOffset()));
  3219. return;
  3220. } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
  3221. ConstraintLetter != 's') {
  3222. // gcc prints these as sign extended. Sign extend value to 64 bits
  3223. // now; without this it would get ZExt'd later in
  3224. // ScheduleDAGSDNodes::EmitNode, which is very generic.
  3225. bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
  3226. BooleanContent BCont = getBooleanContents(MVT::i64);
  3227. ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
  3228. : ISD::SIGN_EXTEND;
  3229. int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
  3230. : C->getSExtValue();
  3231. Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
  3232. SDLoc(C), MVT::i64));
  3233. return;
  3234. } else {
  3235. const unsigned OpCode = Op.getOpcode();
  3236. if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
  3237. if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
  3238. Op = Op.getOperand(1);
  3239. // Subtraction is not commutative.
  3240. else if (OpCode == ISD::ADD &&
  3241. (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
  3242. Op = Op.getOperand(0);
  3243. else
  3244. return;
  3245. Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
  3246. continue;
  3247. }
  3248. }
  3249. return;
  3250. }
  3251. break;
  3252. }
  3253. }
  3254. }
  3255. std::pair<unsigned, const TargetRegisterClass *>
  3256. TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
  3257. StringRef Constraint,
  3258. MVT VT) const {
  3259. if (Constraint.empty() || Constraint[0] != '{')
  3260. return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
  3261. assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
  3262. // Remove the braces from around the name.
  3263. StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
  3264. std::pair<unsigned, const TargetRegisterClass *> R =
  3265. std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
  3266. // Figure out which register class contains this reg.
  3267. for (const TargetRegisterClass *RC : RI->regclasses()) {
  3268. // If none of the value types for this register class are valid, we
  3269. // can't use it. For example, 64-bit reg classes on 32-bit targets.
  3270. if (!isLegalRC(*RI, *RC))
  3271. continue;
  3272. for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
  3273. I != E; ++I) {
  3274. if (RegName.equals_lower(RI->getRegAsmName(*I))) {
  3275. std::pair<unsigned, const TargetRegisterClass *> S =
  3276. std::make_pair(*I, RC);
  3277. // If this register class has the requested value type, return it,
  3278. // otherwise keep searching and return the first class found
  3279. // if no other is found which explicitly has the requested type.
  3280. if (RI->isTypeLegalForClass(*RC, VT))
  3281. return S;
  3282. if (!R.second)
  3283. R = S;
  3284. }
  3285. }
  3286. }
  3287. return R;
  3288. }
  3289. //===----------------------------------------------------------------------===//
  3290. // Constraint Selection.
  3291. /// Return true of this is an input operand that is a matching constraint like
  3292. /// "4".
  3293. bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
  3294. assert(!ConstraintCode.empty() && "No known constraint!");
  3295. return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
  3296. }
  3297. /// If this is an input matching constraint, this method returns the output
  3298. /// operand it matches.
  3299. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
  3300. assert(!ConstraintCode.empty() && "No known constraint!");
  3301. return atoi(ConstraintCode.c_str());
  3302. }
  3303. /// Split up the constraint string from the inline assembly value into the
  3304. /// specific constraints and their prefixes, and also tie in the associated
  3305. /// operand values.
  3306. /// If this returns an empty vector, and if the constraint string itself
  3307. /// isn't empty, there was an error parsing.
  3308. TargetLowering::AsmOperandInfoVector
  3309. TargetLowering::ParseConstraints(const DataLayout &DL,
  3310. const TargetRegisterInfo *TRI,
  3311. ImmutableCallSite CS) const {
  3312. /// Information about all of the constraints.
  3313. AsmOperandInfoVector ConstraintOperands;
  3314. const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
  3315. unsigned maCount = 0; // Largest number of multiple alternative constraints.
  3316. // Do a prepass over the constraints, canonicalizing them, and building up the
  3317. // ConstraintOperands list.
  3318. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
  3319. unsigned ResNo = 0; // ResNo - The result number of the next output.
  3320. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
  3321. ConstraintOperands.emplace_back(std::move(CI));
  3322. AsmOperandInfo &OpInfo = ConstraintOperands.back();
  3323. // Update multiple alternative constraint count.
  3324. if (OpInfo.multipleAlternatives.size() > maCount)
  3325. maCount = OpInfo.multipleAlternatives.size();
  3326. OpInfo.ConstraintVT = MVT::Other;
  3327. // Compute the value type for each operand.
  3328. switch (OpInfo.Type) {
  3329. case InlineAsm::isOutput:
  3330. // Indirect outputs just consume an argument.
  3331. if (OpInfo.isIndirect) {
  3332. OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
  3333. break;
  3334. }
  3335. // The return value of the call is this value. As such, there is no
  3336. // corresponding argument.
  3337. assert(!CS.getType()->isVoidTy() &&
  3338. "Bad inline asm!");
  3339. if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
  3340. OpInfo.ConstraintVT =
  3341. getSimpleValueType(DL, STy->getElementType(ResNo));
  3342. } else {
  3343. assert(ResNo == 0 && "Asm only has one result!");
  3344. OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
  3345. }
  3346. ++ResNo;
  3347. break;
  3348. case InlineAsm::isInput:
  3349. OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
  3350. break;
  3351. case InlineAsm::isClobber:
  3352. // Nothing to do.
  3353. break;
  3354. }
  3355. if (OpInfo.CallOperandVal) {
  3356. llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
  3357. if (OpInfo.isIndirect) {
  3358. llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
  3359. if (!PtrTy)
  3360. report_fatal_error("Indirect operand for inline asm not a pointer!");
  3361. OpTy = PtrTy->getElementType();
  3362. }
  3363. // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
  3364. if (StructType *STy = dyn_cast<StructType>(OpTy))
  3365. if (STy->getNumElements() == 1)
  3366. OpTy = STy->getElementType(0);
  3367. // If OpTy is not a single value, it may be a struct/union that we
  3368. // can tile with integers.
  3369. if (!OpTy->isSingleValueType() && OpTy->isSized()) {
  3370. unsigned BitSize = DL.getTypeSizeInBits(OpTy);
  3371. switch (BitSize) {
  3372. default: break;
  3373. case 1:
  3374. case 8:
  3375. case 16:
  3376. case 32:
  3377. case 64:
  3378. case 128:
  3379. OpInfo.ConstraintVT =
  3380. MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
  3381. break;
  3382. }
  3383. } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
  3384. unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
  3385. OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
  3386. } else {
  3387. OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
  3388. }
  3389. }
  3390. }
  3391. // If we have multiple alternative constraints, select the best alternative.
  3392. if (!ConstraintOperands.empty()) {
  3393. if (maCount) {
  3394. unsigned bestMAIndex = 0;
  3395. int bestWeight = -1;
  3396. // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
  3397. int weight = -1;
  3398. unsigned maIndex;
  3399. // Compute the sums of the weights for each alternative, keeping track
  3400. // of the best (highest weight) one so far.
  3401. for (maIndex = 0; maIndex < maCount; ++maIndex) {
  3402. int weightSum = 0;
  3403. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3404. cIndex != eIndex; ++cIndex) {
  3405. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  3406. if (OpInfo.Type == InlineAsm::isClobber)
  3407. continue;
  3408. // If this is an output operand with a matching input operand,
  3409. // look up the matching input. If their types mismatch, e.g. one
  3410. // is an integer, the other is floating point, or their sizes are
  3411. // different, flag it as an maCantMatch.
  3412. if (OpInfo.hasMatchingInput()) {
  3413. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  3414. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  3415. if ((OpInfo.ConstraintVT.isInteger() !=
  3416. Input.ConstraintVT.isInteger()) ||
  3417. (OpInfo.ConstraintVT.getSizeInBits() !=
  3418. Input.ConstraintVT.getSizeInBits())) {
  3419. weightSum = -1; // Can't match.
  3420. break;
  3421. }
  3422. }
  3423. }
  3424. weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
  3425. if (weight == -1) {
  3426. weightSum = -1;
  3427. break;
  3428. }
  3429. weightSum += weight;
  3430. }
  3431. // Update best.
  3432. if (weightSum > bestWeight) {
  3433. bestWeight = weightSum;
  3434. bestMAIndex = maIndex;
  3435. }
  3436. }
  3437. // Now select chosen alternative in each constraint.
  3438. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3439. cIndex != eIndex; ++cIndex) {
  3440. AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
  3441. if (cInfo.Type == InlineAsm::isClobber)
  3442. continue;
  3443. cInfo.selectAlternative(bestMAIndex);
  3444. }
  3445. }
  3446. }
  3447. // Check and hook up tied operands, choose constraint code to use.
  3448. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3449. cIndex != eIndex; ++cIndex) {
  3450. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  3451. // If this is an output operand with a matching input operand, look up the
  3452. // matching input. If their types mismatch, e.g. one is an integer, the
  3453. // other is floating point, or their sizes are different, flag it as an
  3454. // error.
  3455. if (OpInfo.hasMatchingInput()) {
  3456. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  3457. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  3458. std::pair<unsigned, const TargetRegisterClass *> MatchRC =
  3459. getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
  3460. OpInfo.ConstraintVT);
  3461. std::pair<unsigned, const TargetRegisterClass *> InputRC =
  3462. getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
  3463. Input.ConstraintVT);
  3464. if ((OpInfo.ConstraintVT.isInteger() !=
  3465. Input.ConstraintVT.isInteger()) ||
  3466. (MatchRC.second != InputRC.second)) {
  3467. report_fatal_error("Unsupported asm: input constraint"
  3468. " with a matching output constraint of"
  3469. " incompatible type!");
  3470. }
  3471. }
  3472. }
  3473. }
  3474. return ConstraintOperands;
  3475. }
  3476. /// Return an integer indicating how general CT is.
  3477. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
  3478. switch (CT) {
  3479. case TargetLowering::C_Other:
  3480. case TargetLowering::C_Unknown:
  3481. return 0;
  3482. case TargetLowering::C_Register:
  3483. return 1;
  3484. case TargetLowering::C_RegisterClass:
  3485. return 2;
  3486. case TargetLowering::C_Memory:
  3487. return 3;
  3488. }
  3489. llvm_unreachable("Invalid constraint type");
  3490. }
  3491. /// Examine constraint type and operand type and determine a weight value.
  3492. /// This object must already have been set up with the operand type
  3493. /// and the current alternative constraint selected.
  3494. TargetLowering::ConstraintWeight
  3495. TargetLowering::getMultipleConstraintMatchWeight(
  3496. AsmOperandInfo &info, int maIndex) const {
  3497. InlineAsm::ConstraintCodeVector *rCodes;
  3498. if (maIndex >= (int)info.multipleAlternatives.size())
  3499. rCodes = &info.Codes;
  3500. else
  3501. rCodes = &info.multipleAlternatives[maIndex].Codes;
  3502. ConstraintWeight BestWeight = CW_Invalid;
  3503. // Loop over the options, keeping track of the most general one.
  3504. for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
  3505. ConstraintWeight weight =
  3506. getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
  3507. if (weight > BestWeight)
  3508. BestWeight = weight;
  3509. }
  3510. return BestWeight;
  3511. }
  3512. /// Examine constraint type and operand type and determine a weight value.
  3513. /// This object must already have been set up with the operand type
  3514. /// and the current alternative constraint selected.
  3515. TargetLowering::ConstraintWeight
  3516. TargetLowering::getSingleConstraintMatchWeight(
  3517. AsmOperandInfo &info, const char *constraint) const {
  3518. ConstraintWeight weight = CW_Invalid;
  3519. Value *CallOperandVal = info.CallOperandVal;
  3520. // If we don't have a value, we can't do a match,
  3521. // but allow it at the lowest weight.
  3522. if (!CallOperandVal)
  3523. return CW_Default;
  3524. // Look at the constraint type.
  3525. switch (*constraint) {
  3526. case 'i': // immediate integer.
  3527. case 'n': // immediate integer with a known value.
  3528. if (isa<ConstantInt>(CallOperandVal))
  3529. weight = CW_Constant;
  3530. break;
  3531. case 's': // non-explicit intregal immediate.
  3532. if (isa<GlobalValue>(CallOperandVal))
  3533. weight = CW_Constant;
  3534. break;
  3535. case 'E': // immediate float if host format.
  3536. case 'F': // immediate float.
  3537. if (isa<ConstantFP>(CallOperandVal))
  3538. weight = CW_Constant;
  3539. break;
  3540. case '<': // memory operand with autodecrement.
  3541. case '>': // memory operand with autoincrement.
  3542. case 'm': // memory operand.
  3543. case 'o': // offsettable memory operand
  3544. case 'V': // non-offsettable memory operand
  3545. weight = CW_Memory;
  3546. break;
  3547. case 'r': // general register.
  3548. case 'g': // general register, memory operand or immediate integer.
  3549. // note: Clang converts "g" to "imr".
  3550. if (CallOperandVal->getType()->isIntegerTy())
  3551. weight = CW_Register;
  3552. break;
  3553. case 'X': // any operand.
  3554. default:
  3555. weight = CW_Default;
  3556. break;
  3557. }
  3558. return weight;
  3559. }
  3560. /// If there are multiple different constraints that we could pick for this
  3561. /// operand (e.g. "imr") try to pick the 'best' one.
  3562. /// This is somewhat tricky: constraints fall into four classes:
  3563. /// Other -> immediates and magic values
  3564. /// Register -> one specific register
  3565. /// RegisterClass -> a group of regs
  3566. /// Memory -> memory
  3567. /// Ideally, we would pick the most specific constraint possible: if we have
  3568. /// something that fits into a register, we would pick it. The problem here
  3569. /// is that if we have something that could either be in a register or in
  3570. /// memory that use of the register could cause selection of *other*
  3571. /// operands to fail: they might only succeed if we pick memory. Because of
  3572. /// this the heuristic we use is:
  3573. ///
  3574. /// 1) If there is an 'other' constraint, and if the operand is valid for
  3575. /// that constraint, use it. This makes us take advantage of 'i'
  3576. /// constraints when available.
  3577. /// 2) Otherwise, pick the most general constraint present. This prefers
  3578. /// 'm' over 'r', for example.
  3579. ///
  3580. static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
  3581. const TargetLowering &TLI,
  3582. SDValue Op, SelectionDAG *DAG) {
  3583. assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
  3584. unsigned BestIdx = 0;
  3585. TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
  3586. int BestGenerality = -1;
  3587. // Loop over the options, keeping track of the most general one.
  3588. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
  3589. TargetLowering::ConstraintType CType =
  3590. TLI.getConstraintType(OpInfo.Codes[i]);
  3591. // If this is an 'other' constraint, see if the operand is valid for it.
  3592. // For example, on X86 we might have an 'rI' constraint. If the operand
  3593. // is an integer in the range [0..31] we want to use I (saving a load
  3594. // of a register), otherwise we must use 'r'.
  3595. if (CType == TargetLowering::C_Other && Op.getNode()) {
  3596. assert(OpInfo.Codes[i].size() == 1 &&
  3597. "Unhandled multi-letter 'other' constraint");
  3598. std::vector<SDValue> ResultOps;
  3599. TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
  3600. ResultOps, *DAG);
  3601. if (!ResultOps.empty()) {
  3602. BestType = CType;
  3603. BestIdx = i;
  3604. break;
  3605. }
  3606. }
  3607. // Things with matching constraints can only be registers, per gcc
  3608. // documentation. This mainly affects "g" constraints.
  3609. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
  3610. continue;
  3611. // This constraint letter is more general than the previous one, use it.
  3612. int Generality = getConstraintGenerality(CType);
  3613. if (Generality > BestGenerality) {
  3614. BestType = CType;
  3615. BestIdx = i;
  3616. BestGenerality = Generality;
  3617. }
  3618. }
  3619. OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
  3620. OpInfo.ConstraintType = BestType;
  3621. }
  3622. /// Determines the constraint code and constraint type to use for the specific
  3623. /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
  3624. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
  3625. SDValue Op,
  3626. SelectionDAG *DAG) const {
  3627. assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
  3628. // Single-letter constraints ('r') are very common.
  3629. if (OpInfo.Codes.size() == 1) {
  3630. OpInfo.ConstraintCode = OpInfo.Codes[0];
  3631. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  3632. } else {
  3633. ChooseConstraint(OpInfo, *this, Op, DAG);
  3634. }
  3635. // 'X' matches anything.
  3636. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
  3637. // Labels and constants are handled elsewhere ('X' is the only thing
  3638. // that matches labels). For Functions, the type here is the type of
  3639. // the result, which is not what we want to look at; leave them alone.
  3640. Value *v = OpInfo.CallOperandVal;
  3641. if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
  3642. OpInfo.CallOperandVal = v;
  3643. return;
  3644. }
  3645. if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
  3646. return;
  3647. // Otherwise, try to resolve it to something we know about by looking at
  3648. // the actual operand type.
  3649. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
  3650. OpInfo.ConstraintCode = Repl;
  3651. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  3652. }
  3653. }
  3654. }
  3655. /// Given an exact SDIV by a constant, create a multiplication
  3656. /// with the multiplicative inverse of the constant.
  3657. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
  3658. const SDLoc &dl, SelectionDAG &DAG,
  3659. SmallVectorImpl<SDNode *> &Created) {
  3660. SDValue Op0 = N->getOperand(0);
  3661. SDValue Op1 = N->getOperand(1);
  3662. EVT VT = N->getValueType(0);
  3663. EVT SVT = VT.getScalarType();
  3664. EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
  3665. EVT ShSVT = ShVT.getScalarType();
  3666. bool UseSRA = false;
  3667. SmallVector<SDValue, 16> Shifts, Factors;
  3668. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  3669. if (C->isNullValue())
  3670. return false;
  3671. APInt Divisor = C->getAPIntValue();
  3672. unsigned Shift = Divisor.countTrailingZeros();
  3673. if (Shift) {
  3674. Divisor.ashrInPlace(Shift);
  3675. UseSRA = true;
  3676. }
  3677. // Calculate the multiplicative inverse, using Newton's method.
  3678. APInt t;
  3679. APInt Factor = Divisor;
  3680. while ((t = Divisor * Factor) != 1)
  3681. Factor *= APInt(Divisor.getBitWidth(), 2) - t;
  3682. Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
  3683. Factors.push_back(DAG.getConstant(Factor, dl, SVT));
  3684. return true;
  3685. };
  3686. // Collect all magic values from the build vector.
  3687. if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
  3688. return SDValue();
  3689. SDValue Shift, Factor;
  3690. if (VT.isVector()) {
  3691. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  3692. Factor = DAG.getBuildVector(VT, dl, Factors);
  3693. } else {
  3694. Shift = Shifts[0];
  3695. Factor = Factors[0];
  3696. }
  3697. SDValue Res = Op0;
  3698. // Shift the value upfront if it is even, so the LSB is one.
  3699. if (UseSRA) {
  3700. // TODO: For UDIV use SRL instead of SRA.
  3701. SDNodeFlags Flags;
  3702. Flags.setExact(true);
  3703. Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
  3704. Created.push_back(Res.getNode());
  3705. }
  3706. return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
  3707. }
  3708. SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  3709. SelectionDAG &DAG,
  3710. SmallVectorImpl<SDNode *> &Created) const {
  3711. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3712. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  3713. if (TLI.isIntDivCheap(N->getValueType(0), Attr))
  3714. return SDValue(N, 0); // Lower SDIV as SDIV
  3715. return SDValue();
  3716. }
  3717. /// Given an ISD::SDIV node expressing a divide by constant,
  3718. /// return a DAG expression to select that will generate the same value by
  3719. /// multiplying by a magic number.
  3720. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  3721. SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
  3722. bool IsAfterLegalization,
  3723. SmallVectorImpl<SDNode *> &Created) const {
  3724. SDLoc dl(N);
  3725. EVT VT = N->getValueType(0);
  3726. EVT SVT = VT.getScalarType();
  3727. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  3728. EVT ShSVT = ShVT.getScalarType();
  3729. unsigned EltBits = VT.getScalarSizeInBits();
  3730. // Check to see if we can do this.
  3731. // FIXME: We should be more aggressive here.
  3732. if (!isTypeLegal(VT))
  3733. return SDValue();
  3734. // If the sdiv has an 'exact' bit we can use a simpler lowering.
  3735. if (N->getFlags().hasExact())
  3736. return BuildExactSDIV(*this, N, dl, DAG, Created);
  3737. SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
  3738. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  3739. if (C->isNullValue())
  3740. return false;
  3741. const APInt &Divisor = C->getAPIntValue();
  3742. APInt::ms magics = Divisor.magic();
  3743. int NumeratorFactor = 0;
  3744. int ShiftMask = -1;
  3745. if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
  3746. // If d is +1/-1, we just multiply the numerator by +1/-1.
  3747. NumeratorFactor = Divisor.getSExtValue();
  3748. magics.m = 0;
  3749. magics.s = 0;
  3750. ShiftMask = 0;
  3751. } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
  3752. // If d > 0 and m < 0, add the numerator.
  3753. NumeratorFactor = 1;
  3754. } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
  3755. // If d < 0 and m > 0, subtract the numerator.
  3756. NumeratorFactor = -1;
  3757. }
  3758. MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
  3759. Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
  3760. Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
  3761. ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
  3762. return true;
  3763. };
  3764. SDValue N0 = N->getOperand(0);
  3765. SDValue N1 = N->getOperand(1);
  3766. // Collect the shifts / magic values from each element.
  3767. if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
  3768. return SDValue();
  3769. SDValue MagicFactor, Factor, Shift, ShiftMask;
  3770. if (VT.isVector()) {
  3771. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  3772. Factor = DAG.getBuildVector(VT, dl, Factors);
  3773. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  3774. ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
  3775. } else {
  3776. MagicFactor = MagicFactors[0];
  3777. Factor = Factors[0];
  3778. Shift = Shifts[0];
  3779. ShiftMask = ShiftMasks[0];
  3780. }
  3781. // Multiply the numerator (operand 0) by the magic value.
  3782. // FIXME: We should support doing a MUL in a wider type.
  3783. SDValue Q;
  3784. if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
  3785. : isOperationLegalOrCustom(ISD::MULHS, VT))
  3786. Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
  3787. else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
  3788. : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
  3789. SDValue LoHi =
  3790. DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
  3791. Q = SDValue(LoHi.getNode(), 1);
  3792. } else
  3793. return SDValue(); // No mulhs or equivalent.
  3794. Created.push_back(Q.getNode());
  3795. // (Optionally) Add/subtract the numerator using Factor.
  3796. Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
  3797. Created.push_back(Factor.getNode());
  3798. Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
  3799. Created.push_back(Q.getNode());
  3800. // Shift right algebraic by shift value.
  3801. Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
  3802. Created.push_back(Q.getNode());
  3803. // Extract the sign bit, mask it and add it to the quotient.
  3804. SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
  3805. SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
  3806. Created.push_back(T.getNode());
  3807. T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
  3808. Created.push_back(T.getNode());
  3809. return DAG.getNode(ISD::ADD, dl, VT, Q, T);
  3810. }
  3811. /// Given an ISD::UDIV node expressing a divide by constant,
  3812. /// return a DAG expression to select that will generate the same value by
  3813. /// multiplying by a magic number.
  3814. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  3815. SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
  3816. bool IsAfterLegalization,
  3817. SmallVectorImpl<SDNode *> &Created) const {
  3818. SDLoc dl(N);
  3819. EVT VT = N->getValueType(0);
  3820. EVT SVT = VT.getScalarType();
  3821. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  3822. EVT ShSVT = ShVT.getScalarType();
  3823. unsigned EltBits = VT.getScalarSizeInBits();
  3824. // Check to see if we can do this.
  3825. // FIXME: We should be more aggressive here.
  3826. if (!isTypeLegal(VT))
  3827. return SDValue();
  3828. bool UseNPQ = false;
  3829. SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
  3830. auto BuildUDIVPattern = [&](ConstantSDNode *C) {
  3831. if (C->isNullValue())
  3832. return false;
  3833. // FIXME: We should use a narrower constant when the upper
  3834. // bits are known to be zero.
  3835. APInt Divisor = C->getAPIntValue();
  3836. APInt::mu magics = Divisor.magicu();
  3837. unsigned PreShift = 0, PostShift = 0;
  3838. // If the divisor is even, we can avoid using the expensive fixup by
  3839. // shifting the divided value upfront.
  3840. if (magics.a != 0 && !Divisor[0]) {
  3841. PreShift = Divisor.countTrailingZeros();
  3842. // Get magic number for the shifted divisor.
  3843. magics = Divisor.lshr(PreShift).magicu(PreShift);
  3844. assert(magics.a == 0 && "Should use cheap fixup now");
  3845. }
  3846. APInt Magic = magics.m;
  3847. unsigned SelNPQ;
  3848. if (magics.a == 0 || Divisor.isOneValue()) {
  3849. assert(magics.s < Divisor.getBitWidth() &&
  3850. "We shouldn't generate an undefined shift!");
  3851. PostShift = magics.s;
  3852. SelNPQ = false;
  3853. } else {
  3854. PostShift = magics.s - 1;
  3855. SelNPQ = true;
  3856. }
  3857. PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
  3858. MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
  3859. NPQFactors.push_back(
  3860. DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
  3861. : APInt::getNullValue(EltBits),
  3862. dl, SVT));
  3863. PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
  3864. UseNPQ |= SelNPQ;
  3865. return true;
  3866. };
  3867. SDValue N0 = N->getOperand(0);
  3868. SDValue N1 = N->getOperand(1);
  3869. // Collect the shifts/magic values from each element.
  3870. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
  3871. return SDValue();
  3872. SDValue PreShift, PostShift, MagicFactor, NPQFactor;
  3873. if (VT.isVector()) {
  3874. PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
  3875. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  3876. NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
  3877. PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
  3878. } else {
  3879. PreShift = PreShifts[0];
  3880. MagicFactor = MagicFactors[0];
  3881. PostShift = PostShifts[0];
  3882. }
  3883. SDValue Q = N0;
  3884. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
  3885. Created.push_back(Q.getNode());
  3886. // FIXME: We should support doing a MUL in a wider type.
  3887. auto GetMULHU = [&](SDValue X, SDValue Y) {
  3888. if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
  3889. : isOperationLegalOrCustom(ISD::MULHU, VT))
  3890. return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
  3891. if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
  3892. : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
  3893. SDValue LoHi =
  3894. DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  3895. return SDValue(LoHi.getNode(), 1);
  3896. }
  3897. return SDValue(); // No mulhu or equivalent
  3898. };
  3899. // Multiply the numerator (operand 0) by the magic value.
  3900. Q = GetMULHU(Q, MagicFactor);
  3901. if (!Q)
  3902. return SDValue();
  3903. Created.push_back(Q.getNode());
  3904. if (UseNPQ) {
  3905. SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
  3906. Created.push_back(NPQ.getNode());
  3907. // For vectors we might have a mix of non-NPQ/NPQ paths, so use
  3908. // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
  3909. if (VT.isVector())
  3910. NPQ = GetMULHU(NPQ, NPQFactor);
  3911. else
  3912. NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
  3913. Created.push_back(NPQ.getNode());
  3914. Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
  3915. Created.push_back(Q.getNode());
  3916. }
  3917. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
  3918. Created.push_back(Q.getNode());
  3919. SDValue One = DAG.getConstant(1, dl, VT);
  3920. SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
  3921. return DAG.getSelect(dl, VT, IsOne, N0, Q);
  3922. }
  3923. bool TargetLowering::
  3924. verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
  3925. if (!isa<ConstantSDNode>(Op.getOperand(0))) {
  3926. DAG.getContext()->emitError("argument to '__builtin_return_address' must "
  3927. "be a constant integer");
  3928. return true;
  3929. }
  3930. return false;
  3931. }
  3932. //===----------------------------------------------------------------------===//
  3933. // Legalization Utilities
  3934. //===----------------------------------------------------------------------===//
  3935. bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
  3936. SDValue LHS, SDValue RHS,
  3937. SmallVectorImpl<SDValue> &Result,
  3938. EVT HiLoVT, SelectionDAG &DAG,
  3939. MulExpansionKind Kind, SDValue LL,
  3940. SDValue LH, SDValue RL, SDValue RH) const {
  3941. assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
  3942. Opcode == ISD::SMUL_LOHI);
  3943. bool HasMULHS = (Kind == MulExpansionKind::Always) ||
  3944. isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
  3945. bool HasMULHU = (Kind == MulExpansionKind::Always) ||
  3946. isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
  3947. bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  3948. isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
  3949. bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  3950. isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
  3951. if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
  3952. return false;
  3953. unsigned OuterBitSize = VT.getScalarSizeInBits();
  3954. unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
  3955. unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
  3956. unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
  3957. // LL, LH, RL, and RH must be either all NULL or all set to a value.
  3958. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
  3959. (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
  3960. SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
  3961. auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
  3962. bool Signed) -> bool {
  3963. if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
  3964. Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
  3965. Hi = SDValue(Lo.getNode(), 1);
  3966. return true;
  3967. }
  3968. if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
  3969. Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
  3970. Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
  3971. return true;
  3972. }
  3973. return false;
  3974. };
  3975. SDValue Lo, Hi;
  3976. if (!LL.getNode() && !RL.getNode() &&
  3977. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  3978. LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
  3979. RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
  3980. }
  3981. if (!LL.getNode())
  3982. return false;
  3983. APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
  3984. if (DAG.MaskedValueIsZero(LHS, HighMask) &&
  3985. DAG.MaskedValueIsZero(RHS, HighMask)) {
  3986. // The inputs are both zero-extended.
  3987. if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
  3988. Result.push_back(Lo);
  3989. Result.push_back(Hi);
  3990. if (Opcode != ISD::MUL) {
  3991. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  3992. Result.push_back(Zero);
  3993. Result.push_back(Zero);
  3994. }
  3995. return true;
  3996. }
  3997. }
  3998. if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
  3999. RHSSB > InnerBitSize) {
  4000. // The input values are both sign-extended.
  4001. // TODO non-MUL case?
  4002. if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
  4003. Result.push_back(Lo);
  4004. Result.push_back(Hi);
  4005. return true;
  4006. }
  4007. }
  4008. unsigned ShiftAmount = OuterBitSize - InnerBitSize;
  4009. EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
  4010. if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
  4011. // FIXME getShiftAmountTy does not always return a sensible result when VT
  4012. // is an illegal type, and so the type may be too small to fit the shift
  4013. // amount. Override it with i32. The shift will have to be legalized.
  4014. ShiftAmountTy = MVT::i32;
  4015. }
  4016. SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
  4017. if (!LH.getNode() && !RH.getNode() &&
  4018. isOperationLegalOrCustom(ISD::SRL, VT) &&
  4019. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  4020. LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
  4021. LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
  4022. RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
  4023. RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
  4024. }
  4025. if (!LH.getNode())
  4026. return false;
  4027. if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
  4028. return false;
  4029. Result.push_back(Lo);
  4030. if (Opcode == ISD::MUL) {
  4031. RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
  4032. LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
  4033. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
  4034. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
  4035. Result.push_back(Hi);
  4036. return true;
  4037. }
  4038. // Compute the full width result.
  4039. auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
  4040. Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
  4041. Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  4042. Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
  4043. return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
  4044. };
  4045. SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  4046. if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
  4047. return false;
  4048. // This is effectively the add part of a multiply-add of half-sized operands,
  4049. // so it cannot overflow.
  4050. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  4051. if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
  4052. return false;
  4053. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  4054. EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4055. bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
  4056. isOperationLegalOrCustom(ISD::ADDE, VT));
  4057. if (UseGlue)
  4058. Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
  4059. Merge(Lo, Hi));
  4060. else
  4061. Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
  4062. Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
  4063. SDValue Carry = Next.getValue(1);
  4064. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4065. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  4066. if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
  4067. return false;
  4068. if (UseGlue)
  4069. Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
  4070. Carry);
  4071. else
  4072. Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
  4073. Zero, Carry);
  4074. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  4075. if (Opcode == ISD::SMUL_LOHI) {
  4076. SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  4077. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
  4078. Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
  4079. NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  4080. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
  4081. Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
  4082. }
  4083. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4084. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  4085. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4086. return true;
  4087. }
  4088. bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
  4089. SelectionDAG &DAG, MulExpansionKind Kind,
  4090. SDValue LL, SDValue LH, SDValue RL,
  4091. SDValue RH) const {
  4092. SmallVector<SDValue, 2> Result;
  4093. bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
  4094. N->getOperand(0), N->getOperand(1), Result, HiLoVT,
  4095. DAG, Kind, LL, LH, RL, RH);
  4096. if (Ok) {
  4097. assert(Result.size() == 2);
  4098. Lo = Result[0];
  4099. Hi = Result[1];
  4100. }
  4101. return Ok;
  4102. }
  4103. bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
  4104. SelectionDAG &DAG) const {
  4105. EVT VT = Node->getValueType(0);
  4106. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  4107. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4108. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4109. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  4110. return false;
  4111. // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
  4112. // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
  4113. SDValue X = Node->getOperand(0);
  4114. SDValue Y = Node->getOperand(1);
  4115. SDValue Z = Node->getOperand(2);
  4116. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  4117. bool IsFSHL = Node->getOpcode() == ISD::FSHL;
  4118. SDLoc DL(SDValue(Node, 0));
  4119. EVT ShVT = Z.getValueType();
  4120. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  4121. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  4122. SDValue ShAmt;
  4123. if (isPowerOf2_32(EltSizeInBits)) {
  4124. SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  4125. ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
  4126. } else {
  4127. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  4128. }
  4129. SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
  4130. SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
  4131. SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
  4132. SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
  4133. // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
  4134. // and that is undefined. We must compare and select to avoid UB.
  4135. EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
  4136. // For fshl, 0-shift returns the 1st arg (X).
  4137. // For fshr, 0-shift returns the 2nd arg (Y).
  4138. SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
  4139. Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
  4140. return true;
  4141. }
  4142. // TODO: Merge with expandFunnelShift.
  4143. bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
  4144. SelectionDAG &DAG) const {
  4145. EVT VT = Node->getValueType(0);
  4146. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  4147. bool IsLeft = Node->getOpcode() == ISD::ROTL;
  4148. SDValue Op0 = Node->getOperand(0);
  4149. SDValue Op1 = Node->getOperand(1);
  4150. SDLoc DL(SDValue(Node, 0));
  4151. EVT ShVT = Op1.getValueType();
  4152. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  4153. // If a rotate in the other direction is legal, use it.
  4154. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
  4155. if (isOperationLegal(RevRot, VT)) {
  4156. SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
  4157. Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
  4158. return true;
  4159. }
  4160. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  4161. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4162. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4163. !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
  4164. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  4165. return false;
  4166. // Otherwise,
  4167. // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
  4168. // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
  4169. //
  4170. assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
  4171. "Expecting the type bitwidth to be a power of 2");
  4172. unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
  4173. unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
  4174. SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  4175. SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
  4176. SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
  4177. SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
  4178. Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
  4179. DAG.getNode(HsOpc, DL, VT, Op0, And1));
  4180. return true;
  4181. }
  4182. bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
  4183. SelectionDAG &DAG) const {
  4184. SDValue Src = Node->getOperand(0);
  4185. EVT SrcVT = Src.getValueType();
  4186. EVT DstVT = Node->getValueType(0);
  4187. SDLoc dl(SDValue(Node, 0));
  4188. // FIXME: Only f32 to i64 conversions are supported.
  4189. if (SrcVT != MVT::f32 || DstVT != MVT::i64)
  4190. return false;
  4191. // Expand f32 -> i64 conversion
  4192. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  4193. // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
  4194. unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
  4195. EVT IntVT = SrcVT.changeTypeToInteger();
  4196. EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
  4197. SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
  4198. SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
  4199. SDValue Bias = DAG.getConstant(127, dl, IntVT);
  4200. SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
  4201. SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
  4202. SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
  4203. SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
  4204. SDValue ExponentBits = DAG.getNode(
  4205. ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
  4206. DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
  4207. SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
  4208. SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
  4209. DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
  4210. DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
  4211. Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
  4212. SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
  4213. DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
  4214. DAG.getConstant(0x00800000, dl, IntVT));
  4215. R = DAG.getZExtOrTrunc(R, dl, DstVT);
  4216. R = DAG.getSelectCC(
  4217. dl, Exponent, ExponentLoBit,
  4218. DAG.getNode(ISD::SHL, dl, DstVT, R,
  4219. DAG.getZExtOrTrunc(
  4220. DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
  4221. dl, IntShVT)),
  4222. DAG.getNode(ISD::SRL, dl, DstVT, R,
  4223. DAG.getZExtOrTrunc(
  4224. DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
  4225. dl, IntShVT)),
  4226. ISD::SETGT);
  4227. SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
  4228. DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
  4229. Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
  4230. DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
  4231. return true;
  4232. }
  4233. bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
  4234. SelectionDAG &DAG) const {
  4235. SDLoc dl(SDValue(Node, 0));
  4236. SDValue Src = Node->getOperand(0);
  4237. EVT SrcVT = Src.getValueType();
  4238. EVT DstVT = Node->getValueType(0);
  4239. EVT SetCCVT =
  4240. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  4241. // Only expand vector types if we have the appropriate vector bit operations.
  4242. if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
  4243. !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
  4244. return false;
  4245. // If the maximum float value is smaller then the signed integer range,
  4246. // the destination signmask can't be represented by the float, so we can
  4247. // just use FP_TO_SINT directly.
  4248. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
  4249. APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
  4250. APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
  4251. if (APFloat::opOverflow &
  4252. APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
  4253. Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  4254. return true;
  4255. }
  4256. SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
  4257. SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
  4258. bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
  4259. if (Strict) {
  4260. // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
  4261. // signmask then offset (the result of which should be fully representable).
  4262. // Sel = Src < 0x8000000000000000
  4263. // Val = select Sel, Src, Src - 0x8000000000000000
  4264. // Ofs = select Sel, 0, 0x8000000000000000
  4265. // Result = fp_to_sint(Val) ^ Ofs
  4266. // TODO: Should any fast-math-flags be set for the FSUB?
  4267. SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
  4268. DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
  4269. SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
  4270. DAG.getConstant(SignMask, dl, DstVT));
  4271. Result = DAG.getNode(ISD::XOR, dl, DstVT,
  4272. DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
  4273. } else {
  4274. // Expand based on maximum range of FP_TO_SINT:
  4275. // True = fp_to_sint(Src)
  4276. // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
  4277. // Result = select (Src < 0x8000000000000000), True, False
  4278. SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  4279. // TODO: Should any fast-math-flags be set for the FSUB?
  4280. SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
  4281. DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
  4282. False = DAG.getNode(ISD::XOR, dl, DstVT, False,
  4283. DAG.getConstant(SignMask, dl, DstVT));
  4284. Result = DAG.getSelect(dl, DstVT, Sel, True, False);
  4285. }
  4286. return true;
  4287. }
  4288. bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
  4289. SelectionDAG &DAG) const {
  4290. SDValue Src = Node->getOperand(0);
  4291. EVT SrcVT = Src.getValueType();
  4292. EVT DstVT = Node->getValueType(0);
  4293. if (SrcVT.getScalarType() != MVT::i64)
  4294. return false;
  4295. SDLoc dl(SDValue(Node, 0));
  4296. EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
  4297. if (DstVT.getScalarType() == MVT::f32) {
  4298. // Only expand vector types if we have the appropriate vector bit
  4299. // operations.
  4300. if (SrcVT.isVector() &&
  4301. (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  4302. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  4303. !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
  4304. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  4305. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  4306. return false;
  4307. // For unsigned conversions, convert them to signed conversions using the
  4308. // algorithm from the x86_64 __floatundidf in compiler_rt.
  4309. SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
  4310. SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
  4311. SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
  4312. SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
  4313. SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
  4314. SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
  4315. SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
  4316. SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
  4317. // TODO: This really should be implemented using a branch rather than a
  4318. // select. We happen to get lucky and machinesink does the right
  4319. // thing most of the time. This would be a good candidate for a
  4320. // pseudo-op, or, even better, for whole-function isel.
  4321. EVT SetCCVT =
  4322. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  4323. SDValue SignBitTest = DAG.getSetCC(
  4324. dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
  4325. Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
  4326. return true;
  4327. }
  4328. if (DstVT.getScalarType() == MVT::f64) {
  4329. // Only expand vector types if we have the appropriate vector bit
  4330. // operations.
  4331. if (SrcVT.isVector() &&
  4332. (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  4333. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  4334. !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
  4335. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  4336. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  4337. return false;
  4338. // Implementation of unsigned i64 to f64 following the algorithm in
  4339. // __floatundidf in compiler_rt. This implementation has the advantage
  4340. // of performing rounding correctly, both in the default rounding mode
  4341. // and in all alternate rounding modes.
  4342. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
  4343. SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
  4344. BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
  4345. SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
  4346. SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
  4347. SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
  4348. SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
  4349. SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
  4350. SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
  4351. SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
  4352. SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
  4353. SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
  4354. SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
  4355. Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
  4356. return true;
  4357. }
  4358. return false;
  4359. }
  4360. SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
  4361. SelectionDAG &DAG) const {
  4362. SDLoc dl(Node);
  4363. unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
  4364. ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
  4365. EVT VT = Node->getValueType(0);
  4366. if (isOperationLegalOrCustom(NewOp, VT)) {
  4367. SDValue Quiet0 = Node->getOperand(0);
  4368. SDValue Quiet1 = Node->getOperand(1);
  4369. if (!Node->getFlags().hasNoNaNs()) {
  4370. // Insert canonicalizes if it's possible we need to quiet to get correct
  4371. // sNaN behavior.
  4372. if (!DAG.isKnownNeverSNaN(Quiet0)) {
  4373. Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
  4374. Node->getFlags());
  4375. }
  4376. if (!DAG.isKnownNeverSNaN(Quiet1)) {
  4377. Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
  4378. Node->getFlags());
  4379. }
  4380. }
  4381. return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
  4382. }
  4383. return SDValue();
  4384. }
  4385. bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
  4386. SelectionDAG &DAG) const {
  4387. SDLoc dl(Node);
  4388. EVT VT = Node->getValueType(0);
  4389. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4390. SDValue Op = Node->getOperand(0);
  4391. unsigned Len = VT.getScalarSizeInBits();
  4392. assert(VT.isInteger() && "CTPOP not implemented for this type.");
  4393. // TODO: Add support for irregular type lengths.
  4394. if (!(Len <= 128 && Len % 8 == 0))
  4395. return false;
  4396. // Only expand vector types if we have the appropriate vector bit operations.
  4397. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
  4398. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4399. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4400. (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
  4401. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  4402. return false;
  4403. // This is the "best" algorithm from
  4404. // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  4405. SDValue Mask55 =
  4406. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
  4407. SDValue Mask33 =
  4408. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
  4409. SDValue Mask0F =
  4410. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
  4411. SDValue Mask01 =
  4412. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
  4413. // v = v - ((v >> 1) & 0x55555555...)
  4414. Op = DAG.getNode(ISD::SUB, dl, VT, Op,
  4415. DAG.getNode(ISD::AND, dl, VT,
  4416. DAG.getNode(ISD::SRL, dl, VT, Op,
  4417. DAG.getConstant(1, dl, ShVT)),
  4418. Mask55));
  4419. // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
  4420. Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
  4421. DAG.getNode(ISD::AND, dl, VT,
  4422. DAG.getNode(ISD::SRL, dl, VT, Op,
  4423. DAG.getConstant(2, dl, ShVT)),
  4424. Mask33));
  4425. // v = (v + (v >> 4)) & 0x0F0F0F0F...
  4426. Op = DAG.getNode(ISD::AND, dl, VT,
  4427. DAG.getNode(ISD::ADD, dl, VT, Op,
  4428. DAG.getNode(ISD::SRL, dl, VT, Op,
  4429. DAG.getConstant(4, dl, ShVT))),
  4430. Mask0F);
  4431. // v = (v * 0x01010101...) >> (Len - 8)
  4432. if (Len > 8)
  4433. Op =
  4434. DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
  4435. DAG.getConstant(Len - 8, dl, ShVT));
  4436. Result = Op;
  4437. return true;
  4438. }
  4439. bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
  4440. SelectionDAG &DAG) const {
  4441. SDLoc dl(Node);
  4442. EVT VT = Node->getValueType(0);
  4443. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4444. SDValue Op = Node->getOperand(0);
  4445. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  4446. // If the non-ZERO_UNDEF version is supported we can use that instead.
  4447. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
  4448. isOperationLegalOrCustom(ISD::CTLZ, VT)) {
  4449. Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
  4450. return true;
  4451. }
  4452. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  4453. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
  4454. EVT SetCCVT =
  4455. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4456. SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
  4457. SDValue Zero = DAG.getConstant(0, dl, VT);
  4458. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  4459. Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
  4460. DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
  4461. return true;
  4462. }
  4463. // Only expand vector types if we have the appropriate vector bit operations.
  4464. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  4465. !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
  4466. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4467. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  4468. return false;
  4469. // for now, we do this:
  4470. // x = x | (x >> 1);
  4471. // x = x | (x >> 2);
  4472. // ...
  4473. // x = x | (x >>16);
  4474. // x = x | (x >>32); // for 64-bit input
  4475. // return popcount(~x);
  4476. //
  4477. // Ref: "Hacker's Delight" by Henry Warren
  4478. for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
  4479. SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
  4480. Op = DAG.getNode(ISD::OR, dl, VT, Op,
  4481. DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
  4482. }
  4483. Op = DAG.getNOT(dl, Op, VT);
  4484. Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
  4485. return true;
  4486. }
  4487. bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
  4488. SelectionDAG &DAG) const {
  4489. SDLoc dl(Node);
  4490. EVT VT = Node->getValueType(0);
  4491. SDValue Op = Node->getOperand(0);
  4492. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  4493. // If the non-ZERO_UNDEF version is supported we can use that instead.
  4494. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
  4495. isOperationLegalOrCustom(ISD::CTTZ, VT)) {
  4496. Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
  4497. return true;
  4498. }
  4499. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  4500. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
  4501. EVT SetCCVT =
  4502. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4503. SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
  4504. SDValue Zero = DAG.getConstant(0, dl, VT);
  4505. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  4506. Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
  4507. DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
  4508. return true;
  4509. }
  4510. // Only expand vector types if we have the appropriate vector bit operations.
  4511. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  4512. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  4513. !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
  4514. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4515. !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
  4516. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  4517. return false;
  4518. // for now, we use: { return popcount(~x & (x - 1)); }
  4519. // unless the target has ctlz but not ctpop, in which case we use:
  4520. // { return 32 - nlz(~x & (x-1)); }
  4521. // Ref: "Hacker's Delight" by Henry Warren
  4522. SDValue Tmp = DAG.getNode(
  4523. ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
  4524. DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
  4525. // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
  4526. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
  4527. Result =
  4528. DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
  4529. DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
  4530. return true;
  4531. }
  4532. Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
  4533. return true;
  4534. }
  4535. bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
  4536. SelectionDAG &DAG) const {
  4537. SDLoc dl(N);
  4538. EVT VT = N->getValueType(0);
  4539. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4540. SDValue Op = N->getOperand(0);
  4541. // Only expand vector types if we have the appropriate vector operations.
  4542. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
  4543. !isOperationLegalOrCustom(ISD::ADD, VT) ||
  4544. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  4545. return false;
  4546. SDValue Shift =
  4547. DAG.getNode(ISD::SRA, dl, VT, Op,
  4548. DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
  4549. SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
  4550. Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
  4551. return true;
  4552. }
  4553. SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
  4554. SelectionDAG &DAG) const {
  4555. SDLoc SL(LD);
  4556. SDValue Chain = LD->getChain();
  4557. SDValue BasePTR = LD->getBasePtr();
  4558. EVT SrcVT = LD->getMemoryVT();
  4559. ISD::LoadExtType ExtType = LD->getExtensionType();
  4560. unsigned NumElem = SrcVT.getVectorNumElements();
  4561. EVT SrcEltVT = SrcVT.getScalarType();
  4562. EVT DstEltVT = LD->getValueType(0).getScalarType();
  4563. unsigned Stride = SrcEltVT.getSizeInBits() / 8;
  4564. assert(SrcEltVT.isByteSized());
  4565. SmallVector<SDValue, 8> Vals;
  4566. SmallVector<SDValue, 8> LoadChains;
  4567. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  4568. SDValue ScalarLoad =
  4569. DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
  4570. LD->getPointerInfo().getWithOffset(Idx * Stride),
  4571. SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
  4572. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  4573. BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
  4574. Vals.push_back(ScalarLoad.getValue(0));
  4575. LoadChains.push_back(ScalarLoad.getValue(1));
  4576. }
  4577. SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
  4578. SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
  4579. return DAG.getMergeValues({Value, NewChain}, SL);
  4580. }
  4581. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
  4582. SelectionDAG &DAG) const {
  4583. SDLoc SL(ST);
  4584. SDValue Chain = ST->getChain();
  4585. SDValue BasePtr = ST->getBasePtr();
  4586. SDValue Value = ST->getValue();
  4587. EVT StVT = ST->getMemoryVT();
  4588. // The type of the data we want to save
  4589. EVT RegVT = Value.getValueType();
  4590. EVT RegSclVT = RegVT.getScalarType();
  4591. // The type of data as saved in memory.
  4592. EVT MemSclVT = StVT.getScalarType();
  4593. EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
  4594. unsigned NumElem = StVT.getVectorNumElements();
  4595. // A vector must always be stored in memory as-is, i.e. without any padding
  4596. // between the elements, since various code depend on it, e.g. in the
  4597. // handling of a bitcast of a vector type to int, which may be done with a
  4598. // vector store followed by an integer load. A vector that does not have
  4599. // elements that are byte-sized must therefore be stored as an integer
  4600. // built out of the extracted vector elements.
  4601. if (!MemSclVT.isByteSized()) {
  4602. unsigned NumBits = StVT.getSizeInBits();
  4603. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
  4604. SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
  4605. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  4606. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  4607. DAG.getConstant(Idx, SL, IdxVT));
  4608. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
  4609. SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
  4610. unsigned ShiftIntoIdx =
  4611. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  4612. SDValue ShiftAmount =
  4613. DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
  4614. SDValue ShiftedElt =
  4615. DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
  4616. CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
  4617. }
  4618. return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
  4619. ST->getAlignment(), ST->getMemOperand()->getFlags(),
  4620. ST->getAAInfo());
  4621. }
  4622. // Store Stride in bytes
  4623. unsigned Stride = MemSclVT.getSizeInBits() / 8;
  4624. assert(Stride && "Zero stride!");
  4625. // Extract each of the elements from the original vector and save them into
  4626. // memory individually.
  4627. SmallVector<SDValue, 8> Stores;
  4628. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  4629. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  4630. DAG.getConstant(Idx, SL, IdxVT));
  4631. SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
  4632. // This scalar TruncStore may be illegal, but we legalize it later.
  4633. SDValue Store = DAG.getTruncStore(
  4634. Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
  4635. MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
  4636. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  4637. Stores.push_back(Store);
  4638. }
  4639. return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
  4640. }
  4641. std::pair<SDValue, SDValue>
  4642. TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
  4643. assert(LD->getAddressingMode() == ISD::UNINDEXED &&
  4644. "unaligned indexed loads not implemented!");
  4645. SDValue Chain = LD->getChain();
  4646. SDValue Ptr = LD->getBasePtr();
  4647. EVT VT = LD->getValueType(0);
  4648. EVT LoadedVT = LD->getMemoryVT();
  4649. SDLoc dl(LD);
  4650. auto &MF = DAG.getMachineFunction();
  4651. if (VT.isFloatingPoint() || VT.isVector()) {
  4652. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
  4653. if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
  4654. if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
  4655. LoadedVT.isVector()) {
  4656. // Scalarize the load and let the individual components be handled.
  4657. SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
  4658. if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
  4659. return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
  4660. return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
  4661. }
  4662. // Expand to a (misaligned) integer load of the same size,
  4663. // then bitconvert to floating point or vector.
  4664. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
  4665. LD->getMemOperand());
  4666. SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
  4667. if (LoadedVT != VT)
  4668. Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
  4669. ISD::ANY_EXTEND, dl, VT, Result);
  4670. return std::make_pair(Result, newLoad.getValue(1));
  4671. }
  4672. // Copy the value to a (aligned) stack slot using (unaligned) integer
  4673. // loads and stores, then do a (aligned) load from the stack slot.
  4674. MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
  4675. unsigned LoadedBytes = LoadedVT.getStoreSize();
  4676. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  4677. unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
  4678. // Make sure the stack slot is also aligned for the register type.
  4679. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
  4680. auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
  4681. SmallVector<SDValue, 8> Stores;
  4682. SDValue StackPtr = StackBase;
  4683. unsigned Offset = 0;
  4684. EVT PtrVT = Ptr.getValueType();
  4685. EVT StackPtrVT = StackPtr.getValueType();
  4686. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  4687. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  4688. // Do all but one copies using the full register width.
  4689. for (unsigned i = 1; i < NumRegs; i++) {
  4690. // Load one integer register's worth from the original location.
  4691. SDValue Load = DAG.getLoad(
  4692. RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
  4693. MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
  4694. LD->getAAInfo());
  4695. // Follow the load with a store to the stack slot. Remember the store.
  4696. Stores.push_back(DAG.getStore(
  4697. Load.getValue(1), dl, Load, StackPtr,
  4698. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
  4699. // Increment the pointers.
  4700. Offset += RegBytes;
  4701. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  4702. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  4703. }
  4704. // The last copy may be partial. Do an extending load.
  4705. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
  4706. 8 * (LoadedBytes - Offset));
  4707. SDValue Load =
  4708. DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
  4709. LD->getPointerInfo().getWithOffset(Offset), MemVT,
  4710. MinAlign(LD->getAlignment(), Offset),
  4711. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  4712. // Follow the load with a store to the stack slot. Remember the store.
  4713. // On big-endian machines this requires a truncating store to ensure
  4714. // that the bits end up in the right place.
  4715. Stores.push_back(DAG.getTruncStore(
  4716. Load.getValue(1), dl, Load, StackPtr,
  4717. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
  4718. // The order of the stores doesn't matter - say it with a TokenFactor.
  4719. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  4720. // Finally, perform the original load only redirected to the stack slot.
  4721. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
  4722. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
  4723. LoadedVT);
  4724. // Callers expect a MERGE_VALUES node.
  4725. return std::make_pair(Load, TF);
  4726. }
  4727. assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
  4728. "Unaligned load of unsupported type.");
  4729. // Compute the new VT that is half the size of the old one. This is an
  4730. // integer MVT.
  4731. unsigned NumBits = LoadedVT.getSizeInBits();
  4732. EVT NewLoadedVT;
  4733. NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
  4734. NumBits >>= 1;
  4735. unsigned Alignment = LD->getAlignment();
  4736. unsigned IncrementSize = NumBits / 8;
  4737. ISD::LoadExtType HiExtType = LD->getExtensionType();
  4738. // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
  4739. if (HiExtType == ISD::NON_EXTLOAD)
  4740. HiExtType = ISD::ZEXTLOAD;
  4741. // Load the value in two parts
  4742. SDValue Lo, Hi;
  4743. if (DAG.getDataLayout().isLittleEndian()) {
  4744. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  4745. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  4746. LD->getAAInfo());
  4747. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  4748. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
  4749. LD->getPointerInfo().getWithOffset(IncrementSize),
  4750. NewLoadedVT, MinAlign(Alignment, IncrementSize),
  4751. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  4752. } else {
  4753. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  4754. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  4755. LD->getAAInfo());
  4756. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  4757. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
  4758. LD->getPointerInfo().getWithOffset(IncrementSize),
  4759. NewLoadedVT, MinAlign(Alignment, IncrementSize),
  4760. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  4761. }
  4762. // aggregate the two parts
  4763. SDValue ShiftAmount =
  4764. DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
  4765. DAG.getDataLayout()));
  4766. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
  4767. Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
  4768. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
  4769. Hi.getValue(1));
  4770. return std::make_pair(Result, TF);
  4771. }
  4772. SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
  4773. SelectionDAG &DAG) const {
  4774. assert(ST->getAddressingMode() == ISD::UNINDEXED &&
  4775. "unaligned indexed stores not implemented!");
  4776. SDValue Chain = ST->getChain();
  4777. SDValue Ptr = ST->getBasePtr();
  4778. SDValue Val = ST->getValue();
  4779. EVT VT = Val.getValueType();
  4780. int Alignment = ST->getAlignment();
  4781. auto &MF = DAG.getMachineFunction();
  4782. EVT StoreMemVT = ST->getMemoryVT();
  4783. SDLoc dl(ST);
  4784. if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
  4785. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  4786. if (isTypeLegal(intVT)) {
  4787. if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
  4788. StoreMemVT.isVector()) {
  4789. // Scalarize the store and let the individual components be handled.
  4790. SDValue Result = scalarizeVectorStore(ST, DAG);
  4791. return Result;
  4792. }
  4793. // Expand to a bitconvert of the value to the integer type of the
  4794. // same size, then a (misaligned) int store.
  4795. // FIXME: Does not handle truncating floating point stores!
  4796. SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
  4797. Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
  4798. Alignment, ST->getMemOperand()->getFlags());
  4799. return Result;
  4800. }
  4801. // Do a (aligned) store to a stack slot, then copy from the stack slot
  4802. // to the final destination using (unaligned) integer loads and stores.
  4803. MVT RegVT = getRegisterType(
  4804. *DAG.getContext(),
  4805. EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
  4806. EVT PtrVT = Ptr.getValueType();
  4807. unsigned StoredBytes = StoreMemVT.getStoreSize();
  4808. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  4809. unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
  4810. // Make sure the stack slot is also aligned for the register type.
  4811. SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
  4812. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  4813. // Perform the original store, only redirected to the stack slot.
  4814. SDValue Store = DAG.getTruncStore(
  4815. Chain, dl, Val, StackPtr,
  4816. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
  4817. EVT StackPtrVT = StackPtr.getValueType();
  4818. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  4819. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  4820. SmallVector<SDValue, 8> Stores;
  4821. unsigned Offset = 0;
  4822. // Do all but one copies using the full register width.
  4823. for (unsigned i = 1; i < NumRegs; i++) {
  4824. // Load one integer register's worth from the stack slot.
  4825. SDValue Load = DAG.getLoad(
  4826. RegVT, dl, Store, StackPtr,
  4827. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
  4828. // Store it to the final location. Remember the store.
  4829. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
  4830. ST->getPointerInfo().getWithOffset(Offset),
  4831. MinAlign(ST->getAlignment(), Offset),
  4832. ST->getMemOperand()->getFlags()));
  4833. // Increment the pointers.
  4834. Offset += RegBytes;
  4835. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  4836. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  4837. }
  4838. // The last store may be partial. Do a truncating store. On big-endian
  4839. // machines this requires an extending load from the stack slot to ensure
  4840. // that the bits are in the right place.
  4841. EVT LoadMemVT =
  4842. EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
  4843. // Load from the stack slot.
  4844. SDValue Load = DAG.getExtLoad(
  4845. ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
  4846. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
  4847. Stores.push_back(
  4848. DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
  4849. ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
  4850. MinAlign(ST->getAlignment(), Offset),
  4851. ST->getMemOperand()->getFlags(), ST->getAAInfo()));
  4852. // The order of the stores doesn't matter - say it with a TokenFactor.
  4853. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  4854. return Result;
  4855. }
  4856. assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
  4857. "Unaligned store of unknown type.");
  4858. // Get the half-size VT
  4859. EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
  4860. int NumBits = NewStoredVT.getSizeInBits();
  4861. int IncrementSize = NumBits / 8;
  4862. // Divide the stored value in two parts.
  4863. SDValue ShiftAmount = DAG.getConstant(
  4864. NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
  4865. SDValue Lo = Val;
  4866. SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
  4867. // Store the two parts
  4868. SDValue Store1, Store2;
  4869. Store1 = DAG.getTruncStore(Chain, dl,
  4870. DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
  4871. Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
  4872. ST->getMemOperand()->getFlags());
  4873. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  4874. Alignment = MinAlign(Alignment, IncrementSize);
  4875. Store2 = DAG.getTruncStore(
  4876. Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
  4877. ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
  4878. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  4879. SDValue Result =
  4880. DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
  4881. return Result;
  4882. }
  4883. SDValue
  4884. TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
  4885. const SDLoc &DL, EVT DataVT,
  4886. SelectionDAG &DAG,
  4887. bool IsCompressedMemory) const {
  4888. SDValue Increment;
  4889. EVT AddrVT = Addr.getValueType();
  4890. EVT MaskVT = Mask.getValueType();
  4891. assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
  4892. "Incompatible types of Data and Mask");
  4893. if (IsCompressedMemory) {
  4894. // Incrementing the pointer according to number of '1's in the mask.
  4895. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
  4896. SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
  4897. if (MaskIntVT.getSizeInBits() < 32) {
  4898. MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
  4899. MaskIntVT = MVT::i32;
  4900. }
  4901. // Count '1's with POPCNT.
  4902. Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
  4903. Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
  4904. // Scale is an element size in bytes.
  4905. SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
  4906. AddrVT);
  4907. Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
  4908. } else
  4909. Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
  4910. return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
  4911. }
  4912. static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
  4913. SDValue Idx,
  4914. EVT VecVT,
  4915. const SDLoc &dl) {
  4916. if (isa<ConstantSDNode>(Idx))
  4917. return Idx;
  4918. EVT IdxVT = Idx.getValueType();
  4919. unsigned NElts = VecVT.getVectorNumElements();
  4920. if (isPowerOf2_32(NElts)) {
  4921. APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
  4922. Log2_32(NElts));
  4923. return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
  4924. DAG.getConstant(Imm, dl, IdxVT));
  4925. }
  4926. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
  4927. DAG.getConstant(NElts - 1, dl, IdxVT));
  4928. }
  4929. SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
  4930. SDValue VecPtr, EVT VecVT,
  4931. SDValue Index) const {
  4932. SDLoc dl(Index);
  4933. // Make sure the index type is big enough to compute in.
  4934. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
  4935. EVT EltVT = VecVT.getVectorElementType();
  4936. // Calculate the element offset and add it to the pointer.
  4937. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
  4938. assert(EltSize * 8 == EltVT.getSizeInBits() &&
  4939. "Converting bits to bytes lost precision");
  4940. Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
  4941. EVT IdxVT = Index.getValueType();
  4942. Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  4943. DAG.getConstant(EltSize, dl, IdxVT));
  4944. return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
  4945. }
  4946. //===----------------------------------------------------------------------===//
  4947. // Implementation of Emulated TLS Model
  4948. //===----------------------------------------------------------------------===//
  4949. SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
  4950. SelectionDAG &DAG) const {
  4951. // Access to address of TLS varialbe xyz is lowered to a function call:
  4952. // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
  4953. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  4954. PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
  4955. SDLoc dl(GA);
  4956. ArgListTy Args;
  4957. ArgListEntry Entry;
  4958. std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
  4959. Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
  4960. StringRef EmuTlsVarName(NameString);
  4961. GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
  4962. assert(EmuTlsVar && "Cannot find EmuTlsVar ");
  4963. Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
  4964. Entry.Ty = VoidPtrType;
  4965. Args.push_back(Entry);
  4966. SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
  4967. TargetLowering::CallLoweringInfo CLI(DAG);
  4968. CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
  4969. CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
  4970. std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  4971. // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
  4972. // At last for X86 targets, maybe good for other targets too?
  4973. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
  4974. MFI.setAdjustsStack(true); // Is this only for X86 target?
  4975. MFI.setHasCalls(true);
  4976. assert((GA->getOffset() == 0) &&
  4977. "Emulated TLS must have zero offset in GlobalAddressSDNode");
  4978. return CallResult.first;
  4979. }
  4980. SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
  4981. SelectionDAG &DAG) const {
  4982. assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
  4983. if (!isCtlzFast())
  4984. return SDValue();
  4985. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  4986. SDLoc dl(Op);
  4987. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
  4988. if (C->isNullValue() && CC == ISD::SETEQ) {
  4989. EVT VT = Op.getOperand(0).getValueType();
  4990. SDValue Zext = Op.getOperand(0);
  4991. if (VT.bitsLT(MVT::i32)) {
  4992. VT = MVT::i32;
  4993. Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
  4994. }
  4995. unsigned Log2b = Log2_32(VT.getSizeInBits());
  4996. SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
  4997. SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
  4998. DAG.getConstant(Log2b, dl, MVT::i32));
  4999. return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
  5000. }
  5001. }
  5002. return SDValue();
  5003. }
  5004. SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
  5005. unsigned Opcode = Node->getOpcode();
  5006. SDValue LHS = Node->getOperand(0);
  5007. SDValue RHS = Node->getOperand(1);
  5008. EVT VT = LHS.getValueType();
  5009. SDLoc dl(Node);
  5010. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  5011. assert(VT.isInteger() && "Expected operands to be integers");
  5012. // usub.sat(a, b) -> umax(a, b) - b
  5013. if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
  5014. SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
  5015. return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
  5016. }
  5017. if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
  5018. SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
  5019. SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
  5020. return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
  5021. }
  5022. unsigned OverflowOp;
  5023. switch (Opcode) {
  5024. case ISD::SADDSAT:
  5025. OverflowOp = ISD::SADDO;
  5026. break;
  5027. case ISD::UADDSAT:
  5028. OverflowOp = ISD::UADDO;
  5029. break;
  5030. case ISD::SSUBSAT:
  5031. OverflowOp = ISD::SSUBO;
  5032. break;
  5033. case ISD::USUBSAT:
  5034. OverflowOp = ISD::USUBO;
  5035. break;
  5036. default:
  5037. llvm_unreachable("Expected method to receive signed or unsigned saturation "
  5038. "addition or subtraction node.");
  5039. }
  5040. unsigned BitWidth = LHS.getScalarValueSizeInBits();
  5041. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5042. SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
  5043. LHS, RHS);
  5044. SDValue SumDiff = Result.getValue(0);
  5045. SDValue Overflow = Result.getValue(1);
  5046. SDValue Zero = DAG.getConstant(0, dl, VT);
  5047. SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
  5048. if (Opcode == ISD::UADDSAT) {
  5049. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  5050. // (LHS + RHS) | OverflowMask
  5051. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  5052. return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
  5053. }
  5054. // Overflow ? 0xffff.... : (LHS + RHS)
  5055. return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
  5056. } else if (Opcode == ISD::USUBSAT) {
  5057. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  5058. // (LHS - RHS) & ~OverflowMask
  5059. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  5060. SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
  5061. return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
  5062. }
  5063. // Overflow ? 0 : (LHS - RHS)
  5064. return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
  5065. } else {
  5066. // SatMax -> Overflow && SumDiff < 0
  5067. // SatMin -> Overflow && SumDiff >= 0
  5068. APInt MinVal = APInt::getSignedMinValue(BitWidth);
  5069. APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
  5070. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  5071. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  5072. SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
  5073. Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
  5074. return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
  5075. }
  5076. }
  5077. SDValue
  5078. TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
  5079. assert((Node->getOpcode() == ISD::SMULFIX ||
  5080. Node->getOpcode() == ISD::UMULFIX ||
  5081. Node->getOpcode() == ISD::SMULFIXSAT) &&
  5082. "Expected a fixed point multiplication opcode");
  5083. SDLoc dl(Node);
  5084. SDValue LHS = Node->getOperand(0);
  5085. SDValue RHS = Node->getOperand(1);
  5086. EVT VT = LHS.getValueType();
  5087. unsigned Scale = Node->getConstantOperandVal(2);
  5088. bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
  5089. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5090. unsigned VTSize = VT.getScalarSizeInBits();
  5091. if (!Scale) {
  5092. // [us]mul.fix(a, b, 0) -> mul(a, b)
  5093. if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
  5094. return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  5095. } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
  5096. SDValue Result =
  5097. DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  5098. SDValue Product = Result.getValue(0);
  5099. SDValue Overflow = Result.getValue(1);
  5100. SDValue Zero = DAG.getConstant(0, dl, VT);
  5101. APInt MinVal = APInt::getSignedMinValue(VTSize);
  5102. APInt MaxVal = APInt::getSignedMaxValue(VTSize);
  5103. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  5104. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  5105. SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
  5106. Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
  5107. return DAG.getSelect(dl, VT, Overflow, Result, Product);
  5108. }
  5109. }
  5110. bool Signed =
  5111. Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
  5112. assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
  5113. "Expected scale to be less than the number of bits if signed or at "
  5114. "most the number of bits if unsigned.");
  5115. assert(LHS.getValueType() == RHS.getValueType() &&
  5116. "Expected both operands to be the same type");
  5117. // Get the upper and lower bits of the result.
  5118. SDValue Lo, Hi;
  5119. unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
  5120. unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
  5121. if (isOperationLegalOrCustom(LoHiOp, VT)) {
  5122. SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
  5123. Lo = Result.getValue(0);
  5124. Hi = Result.getValue(1);
  5125. } else if (isOperationLegalOrCustom(HiOp, VT)) {
  5126. Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  5127. Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
  5128. } else if (VT.isVector()) {
  5129. return SDValue();
  5130. } else {
  5131. report_fatal_error("Unable to expand fixed point multiplication.");
  5132. }
  5133. if (Scale == VTSize)
  5134. // Result is just the top half since we'd be shifting by the width of the
  5135. // operand.
  5136. return Hi;
  5137. // The result will need to be shifted right by the scale since both operands
  5138. // are scaled. The result is given to us in 2 halves, so we only want part of
  5139. // both in the result.
  5140. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  5141. SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
  5142. DAG.getConstant(Scale, dl, ShiftTy));
  5143. if (!Saturating)
  5144. return Result;
  5145. unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
  5146. SDValue HiMask =
  5147. DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
  5148. SDValue LoMask = DAG.getConstant(
  5149. APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
  5150. APInt MaxVal = APInt::getSignedMaxValue(VTSize);
  5151. APInt MinVal = APInt::getSignedMinValue(VTSize);
  5152. Result = DAG.getSelectCC(dl, Hi, LoMask,
  5153. DAG.getConstant(MaxVal, dl, VT), Result,
  5154. ISD::SETGT);
  5155. return DAG.getSelectCC(dl, Hi, HiMask,
  5156. DAG.getConstant(MinVal, dl, VT), Result,
  5157. ISD::SETLT);
  5158. }
  5159. void TargetLowering::expandUADDSUBO(
  5160. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  5161. SDLoc dl(Node);
  5162. SDValue LHS = Node->getOperand(0);
  5163. SDValue RHS = Node->getOperand(1);
  5164. bool IsAdd = Node->getOpcode() == ISD::UADDO;
  5165. // If ADD/SUBCARRY is legal, use that instead.
  5166. unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
  5167. if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
  5168. SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
  5169. SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
  5170. { LHS, RHS, CarryIn });
  5171. Result = SDValue(NodeCarry.getNode(), 0);
  5172. Overflow = SDValue(NodeCarry.getNode(), 1);
  5173. return;
  5174. }
  5175. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  5176. LHS.getValueType(), LHS, RHS);
  5177. EVT ResultType = Node->getValueType(1);
  5178. EVT SetCCType = getSetCCResultType(
  5179. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  5180. ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
  5181. SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
  5182. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  5183. }
  5184. void TargetLowering::expandSADDSUBO(
  5185. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  5186. SDLoc dl(Node);
  5187. SDValue LHS = Node->getOperand(0);
  5188. SDValue RHS = Node->getOperand(1);
  5189. bool IsAdd = Node->getOpcode() == ISD::SADDO;
  5190. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  5191. LHS.getValueType(), LHS, RHS);
  5192. EVT ResultType = Node->getValueType(1);
  5193. EVT OType = getSetCCResultType(
  5194. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  5195. // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  5196. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
  5197. if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
  5198. SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
  5199. SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
  5200. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  5201. return;
  5202. }
  5203. SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
  5204. // LHSSign -> LHS >= 0
  5205. // RHSSign -> RHS >= 0
  5206. // SumSign -> Result >= 0
  5207. //
  5208. // Add:
  5209. // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
  5210. // Sub:
  5211. // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
  5212. SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
  5213. SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
  5214. SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
  5215. IsAdd ? ISD::SETEQ : ISD::SETNE);
  5216. SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
  5217. SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
  5218. SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
  5219. Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
  5220. }
  5221. bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
  5222. SDValue &Overflow, SelectionDAG &DAG) const {
  5223. SDLoc dl(Node);
  5224. EVT VT = Node->getValueType(0);
  5225. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5226. SDValue LHS = Node->getOperand(0);
  5227. SDValue RHS = Node->getOperand(1);
  5228. bool isSigned = Node->getOpcode() == ISD::SMULO;
  5229. // For power-of-two multiplications we can use a simpler shift expansion.
  5230. if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
  5231. const APInt &C = RHSC->getAPIntValue();
  5232. // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
  5233. if (C.isPowerOf2()) {
  5234. // smulo(x, signed_min) is same as umulo(x, signed_min).
  5235. bool UseArithShift = isSigned && !C.isMinSignedValue();
  5236. EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
  5237. SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
  5238. Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
  5239. Overflow = DAG.getSetCC(dl, SetCCVT,
  5240. DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
  5241. dl, VT, Result, ShiftAmt),
  5242. LHS, ISD::SETNE);
  5243. return true;
  5244. }
  5245. }
  5246. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
  5247. if (VT.isVector())
  5248. WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
  5249. VT.getVectorNumElements());
  5250. SDValue BottomHalf;
  5251. SDValue TopHalf;
  5252. static const unsigned Ops[2][3] =
  5253. { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
  5254. { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
  5255. if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
  5256. BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  5257. TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
  5258. } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
  5259. BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
  5260. RHS);
  5261. TopHalf = BottomHalf.getValue(1);
  5262. } else if (isTypeLegal(WideVT)) {
  5263. LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
  5264. RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
  5265. SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
  5266. BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
  5267. SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
  5268. getShiftAmountTy(WideVT, DAG.getDataLayout()));
  5269. TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
  5270. DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
  5271. } else {
  5272. if (VT.isVector())
  5273. return false;
  5274. // We can fall back to a libcall with an illegal type for the MUL if we
  5275. // have a libcall big enough.
  5276. // Also, we can fall back to a division in some cases, but that's a big
  5277. // performance hit in the general case.
  5278. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  5279. if (WideVT == MVT::i16)
  5280. LC = RTLIB::MUL_I16;
  5281. else if (WideVT == MVT::i32)
  5282. LC = RTLIB::MUL_I32;
  5283. else if (WideVT == MVT::i64)
  5284. LC = RTLIB::MUL_I64;
  5285. else if (WideVT == MVT::i128)
  5286. LC = RTLIB::MUL_I128;
  5287. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
  5288. SDValue HiLHS;
  5289. SDValue HiRHS;
  5290. if (isSigned) {
  5291. // The high part is obtained by SRA'ing all but one of the bits of low
  5292. // part.
  5293. unsigned LoSize = VT.getSizeInBits();
  5294. HiLHS =
  5295. DAG.getNode(ISD::SRA, dl, VT, LHS,
  5296. DAG.getConstant(LoSize - 1, dl,
  5297. getPointerTy(DAG.getDataLayout())));
  5298. HiRHS =
  5299. DAG.getNode(ISD::SRA, dl, VT, RHS,
  5300. DAG.getConstant(LoSize - 1, dl,
  5301. getPointerTy(DAG.getDataLayout())));
  5302. } else {
  5303. HiLHS = DAG.getConstant(0, dl, VT);
  5304. HiRHS = DAG.getConstant(0, dl, VT);
  5305. }
  5306. // Here we're passing the 2 arguments explicitly as 4 arguments that are
  5307. // pre-lowered to the correct types. This all depends upon WideVT not
  5308. // being a legal type for the architecture and thus has to be split to
  5309. // two arguments.
  5310. SDValue Ret;
  5311. if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
  5312. // Halves of WideVT are packed into registers in different order
  5313. // depending on platform endianness. This is usually handled by
  5314. // the C calling convention, but we can't defer to it in
  5315. // the legalizer.
  5316. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
  5317. Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
  5318. /* doesNotReturn */ false, /* isReturnValueUsed */ true,
  5319. /* isPostTypeLegalization */ true).first;
  5320. } else {
  5321. SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
  5322. Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
  5323. /* doesNotReturn */ false, /* isReturnValueUsed */ true,
  5324. /* isPostTypeLegalization */ true).first;
  5325. }
  5326. assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
  5327. "Ret value is a collection of constituent nodes holding result.");
  5328. if (DAG.getDataLayout().isLittleEndian()) {
  5329. // Same as above.
  5330. BottomHalf = Ret.getOperand(0);
  5331. TopHalf = Ret.getOperand(1);
  5332. } else {
  5333. BottomHalf = Ret.getOperand(1);
  5334. TopHalf = Ret.getOperand(0);
  5335. }
  5336. }
  5337. Result = BottomHalf;
  5338. if (isSigned) {
  5339. SDValue ShiftAmt = DAG.getConstant(
  5340. VT.getScalarSizeInBits() - 1, dl,
  5341. getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
  5342. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
  5343. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
  5344. } else {
  5345. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
  5346. DAG.getConstant(0, dl, VT), ISD::SETNE);
  5347. }
  5348. // Truncate the result if SetCC returns a larger type than needed.
  5349. EVT RType = Node->getValueType(1);
  5350. if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
  5351. Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
  5352. assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
  5353. "Unexpected result type for S/UMULO legalization");
  5354. return true;
  5355. }
  5356. SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
  5357. SDLoc dl(Node);
  5358. bool NoNaN = Node->getFlags().hasNoNaNs();
  5359. unsigned BaseOpcode = 0;
  5360. switch (Node->getOpcode()) {
  5361. default: llvm_unreachable("Expected VECREDUCE opcode");
  5362. case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
  5363. case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
  5364. case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
  5365. case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
  5366. case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
  5367. case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
  5368. case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
  5369. case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
  5370. case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
  5371. case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
  5372. case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
  5373. case ISD::VECREDUCE_FMAX:
  5374. BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
  5375. break;
  5376. case ISD::VECREDUCE_FMIN:
  5377. BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
  5378. break;
  5379. }
  5380. SDValue Op = Node->getOperand(0);
  5381. EVT VT = Op.getValueType();
  5382. // Try to use a shuffle reduction for power of two vectors.
  5383. if (VT.isPow2VectorType()) {
  5384. while (VT.getVectorNumElements() > 1) {
  5385. EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
  5386. if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
  5387. break;
  5388. SDValue Lo, Hi;
  5389. std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
  5390. Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
  5391. VT = HalfVT;
  5392. }
  5393. }
  5394. EVT EltVT = VT.getVectorElementType();
  5395. unsigned NumElts = VT.getVectorNumElements();
  5396. SmallVector<SDValue, 8> Ops;
  5397. DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
  5398. SDValue Res = Ops[0];
  5399. for (unsigned i = 1; i < NumElts; i++)
  5400. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
  5401. // Result type may be wider than element type.
  5402. if (EltVT != Node->getValueType(0))
  5403. Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
  5404. return Res;
  5405. }