DAGCombiner.cpp 807 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423154241542515426154271542815429154301543115432154331543415435154361543715438154391544015441154421544315444154451544615447154481544915450154511545215453154541545515456154571545815459154601546115462154631546415465154661546715468154691547015471154721547315474154751547615477154781547915480154811548215483154841548515486154871548815489154901549115492154931549415495154961549715498154991550015501155021550315504155051550615507155081550915510155111551215513155141551515516155171551815519155201552115522155231552415525155261552715528155291553015531155321553315534155351553615537155381553915540155411554215543155441554515546155471554815549155501555115552155531555415555155561555715558155591556015561155621556315564155651556615567155681556915570155711557215573155741557515576155771557815579155801558115582155831558415585155861558715588155891559015591155921559315594155951559615597155981559915600156011560215603156041560515606156071560815609156101561115612156131561415615156161561715618156191562015621156221562315624156251562615627156281562915630156311563215633156341563515636156371563815639156401564115642156431564415645156461564715648156491565015651156521565315654156551565615657156581565915660156611566215663156641566515666156671566815669156701567115672156731567415675156761567715678156791568015681156821568315684156851568615687156881568915690156911569215693156941569515696156971569815699157001570115702157031570415705157061570715708157091571015711157121571315714157151571615717157181571915720157211572215723157241572515726157271572815729157301573115732157331573415735157361573715738157391574015741157421574315744157451574615747157481574915750157511575215753157541575515756157571575815759157601576115762157631576415765157661576715768157691577015771157721577315774157751577615777157781577915780157811578215783157841578515786157871578815789157901579115792157931579415795157961579715798157991580015801158021580315804158051580615807158081580915810158111581215813158141581515816158171581815819158201582115822158231582415825158261582715828158291583015831158321583315834158351583615837158381583915840158411584215843158441584515846158471584815849158501585115852158531585415855158561585715858158591586015861158621586315864158651586615867158681586915870158711587215873158741587515876158771587815879158801588115882158831588415885158861588715888158891589015891158921589315894158951589615897158981589915900159011590215903159041590515906159071590815909159101591115912159131591415915159161591715918159191592015921159221592315924159251592615927159281592915930159311593215933159341593515936159371593815939159401594115942159431594415945159461594715948159491595015951159521595315954159551595615957159581595915960159611596215963159641596515966159671596815969159701597115972159731597415975159761597715978159791598015981159821598315984159851598615987159881598915990159911599215993159941599515996159971599815999160001600116002160031600416005160061600716008160091601016011160121601316014160151601616017160181601916020160211602216023160241602516026160271602816029160301603116032160331603416035160361603716038160391604016041160421604316044160451604616047160481604916050160511605216053160541605516056160571605816059160601606116062160631606416065160661606716068160691607016071160721607316074160751607616077160781607916080160811608216083160841608516086160871608816089160901609116092160931609416095160961609716098160991610016101161021610316104161051610616107161081610916110161111611216113161141611516116161171611816119161201612116122161231612416125161261612716128161291613016131161321613316134161351613616137161381613916140161411614216143161441614516146161471614816149161501615116152161531615416155161561615716158161591616016161161621616316164161651616616167161681616916170161711617216173161741617516176161771617816179161801618116182161831618416185161861618716188161891619016191161921619316194161951619616197161981619916200162011620216203162041620516206162071620816209162101621116212162131621416215162161621716218162191622016221162221622316224162251622616227162281622916230162311623216233162341623516236162371623816239162401624116242162431624416245162461624716248162491625016251162521625316254162551625616257162581625916260162611626216263162641626516266162671626816269162701627116272162731627416275162761627716278162791628016281162821628316284162851628616287162881628916290162911629216293162941629516296162971629816299163001630116302163031630416305163061630716308163091631016311163121631316314163151631616317163181631916320163211632216323163241632516326163271632816329163301633116332163331633416335163361633716338163391634016341163421634316344163451634616347163481634916350163511635216353163541635516356163571635816359163601636116362163631636416365163661636716368163691637016371163721637316374163751637616377163781637916380163811638216383163841638516386163871638816389163901639116392163931639416395163961639716398163991640016401164021640316404164051640616407164081640916410164111641216413164141641516416164171641816419164201642116422164231642416425164261642716428164291643016431164321643316434164351643616437164381643916440164411644216443164441644516446164471644816449164501645116452164531645416455164561645716458164591646016461164621646316464164651646616467164681646916470164711647216473164741647516476164771647816479164801648116482164831648416485164861648716488164891649016491164921649316494164951649616497164981649916500165011650216503165041650516506165071650816509165101651116512165131651416515165161651716518165191652016521165221652316524165251652616527165281652916530165311653216533165341653516536165371653816539165401654116542165431654416545165461654716548165491655016551165521655316554165551655616557165581655916560165611656216563165641656516566165671656816569165701657116572165731657416575165761657716578165791658016581165821658316584165851658616587165881658916590165911659216593165941659516596165971659816599166001660116602166031660416605166061660716608166091661016611166121661316614166151661616617166181661916620166211662216623166241662516626166271662816629166301663116632166331663416635166361663716638166391664016641166421664316644166451664616647166481664916650166511665216653166541665516656166571665816659166601666116662166631666416665166661666716668166691667016671166721667316674166751667616677166781667916680166811668216683166841668516686166871668816689166901669116692166931669416695166961669716698166991670016701167021670316704167051670616707167081670916710167111671216713167141671516716167171671816719167201672116722167231672416725167261672716728167291673016731167321673316734167351673616737167381673916740167411674216743167441674516746167471674816749167501675116752167531675416755167561675716758167591676016761167621676316764167651676616767167681676916770167711677216773167741677516776167771677816779167801678116782167831678416785167861678716788167891679016791167921679316794167951679616797167981679916800168011680216803168041680516806168071680816809168101681116812168131681416815168161681716818168191682016821168221682316824168251682616827168281682916830168311683216833168341683516836168371683816839168401684116842168431684416845168461684716848168491685016851168521685316854168551685616857168581685916860168611686216863168641686516866168671686816869168701687116872168731687416875168761687716878168791688016881168821688316884168851688616887168881688916890168911689216893168941689516896168971689816899169001690116902169031690416905169061690716908169091691016911169121691316914169151691616917169181691916920169211692216923169241692516926169271692816929169301693116932169331693416935169361693716938169391694016941169421694316944169451694616947169481694916950169511695216953169541695516956169571695816959169601696116962169631696416965169661696716968169691697016971169721697316974169751697616977169781697916980169811698216983169841698516986169871698816989169901699116992169931699416995169961699716998169991700017001170021700317004170051700617007170081700917010170111701217013170141701517016170171701817019170201702117022170231702417025170261702717028170291703017031170321703317034170351703617037170381703917040170411704217043170441704517046170471704817049170501705117052170531705417055170561705717058170591706017061170621706317064170651706617067170681706917070170711707217073170741707517076170771707817079170801708117082170831708417085170861708717088170891709017091170921709317094170951709617097170981709917100171011710217103171041710517106171071710817109171101711117112171131711417115171161711717118171191712017121171221712317124171251712617127171281712917130171311713217133171341713517136171371713817139171401714117142171431714417145171461714717148171491715017151171521715317154171551715617157171581715917160171611716217163171641716517166171671716817169171701717117172171731717417175171761717717178171791718017181171821718317184171851718617187171881718917190171911719217193171941719517196171971719817199172001720117202172031720417205172061720717208172091721017211172121721317214172151721617217172181721917220172211722217223172241722517226172271722817229172301723117232172331723417235172361723717238172391724017241172421724317244172451724617247172481724917250172511725217253172541725517256172571725817259172601726117262172631726417265172661726717268172691727017271172721727317274172751727617277172781727917280172811728217283172841728517286172871728817289172901729117292172931729417295172961729717298172991730017301173021730317304173051730617307173081730917310173111731217313173141731517316173171731817319173201732117322173231732417325173261732717328173291733017331173321733317334173351733617337173381733917340173411734217343173441734517346173471734817349173501735117352173531735417355173561735717358173591736017361173621736317364173651736617367173681736917370173711737217373173741737517376173771737817379173801738117382173831738417385173861738717388173891739017391173921739317394173951739617397173981739917400174011740217403174041740517406174071740817409174101741117412174131741417415174161741717418174191742017421174221742317424174251742617427174281742917430174311743217433174341743517436174371743817439174401744117442174431744417445174461744717448174491745017451174521745317454174551745617457174581745917460174611746217463174641746517466174671746817469174701747117472174731747417475174761747717478174791748017481174821748317484174851748617487174881748917490174911749217493174941749517496174971749817499175001750117502175031750417505175061750717508175091751017511175121751317514175151751617517175181751917520175211752217523175241752517526175271752817529175301753117532175331753417535175361753717538175391754017541175421754317544175451754617547175481754917550175511755217553175541755517556175571755817559175601756117562175631756417565175661756717568175691757017571175721757317574175751757617577175781757917580175811758217583175841758517586175871758817589175901759117592175931759417595175961759717598175991760017601176021760317604176051760617607176081760917610176111761217613176141761517616176171761817619176201762117622176231762417625176261762717628176291763017631176321763317634176351763617637176381763917640176411764217643176441764517646176471764817649176501765117652176531765417655176561765717658176591766017661176621766317664176651766617667176681766917670176711767217673176741767517676176771767817679176801768117682176831768417685176861768717688176891769017691176921769317694176951769617697176981769917700177011770217703177041770517706177071770817709177101771117712177131771417715177161771717718177191772017721177221772317724177251772617727177281772917730177311773217733177341773517736177371773817739177401774117742177431774417745177461774717748177491775017751177521775317754177551775617757177581775917760177611776217763177641776517766177671776817769177701777117772177731777417775177761777717778177791778017781177821778317784177851778617787177881778917790177911779217793177941779517796177971779817799178001780117802178031780417805178061780717808178091781017811178121781317814178151781617817178181781917820178211782217823178241782517826178271782817829178301783117832178331783417835178361783717838178391784017841178421784317844178451784617847178481784917850178511785217853178541785517856178571785817859178601786117862178631786417865178661786717868178691787017871178721787317874178751787617877178781787917880178811788217883178841788517886178871788817889178901789117892178931789417895178961789717898178991790017901179021790317904179051790617907179081790917910179111791217913179141791517916179171791817919179201792117922179231792417925179261792717928179291793017931179321793317934179351793617937179381793917940179411794217943179441794517946179471794817949179501795117952179531795417955179561795717958179591796017961179621796317964179651796617967179681796917970179711797217973179741797517976179771797817979179801798117982179831798417985179861798717988179891799017991179921799317994179951799617997179981799918000180011800218003180041800518006180071800818009180101801118012180131801418015180161801718018180191802018021180221802318024180251802618027180281802918030180311803218033180341803518036180371803818039180401804118042180431804418045180461804718048180491805018051180521805318054180551805618057180581805918060180611806218063180641806518066180671806818069180701807118072180731807418075180761807718078180791808018081180821808318084180851808618087180881808918090180911809218093180941809518096180971809818099181001810118102181031810418105181061810718108181091811018111181121811318114181151811618117181181811918120181211812218123181241812518126181271812818129181301813118132181331813418135181361813718138181391814018141181421814318144181451814618147181481814918150181511815218153181541815518156181571815818159181601816118162181631816418165181661816718168181691817018171181721817318174181751817618177181781817918180181811818218183181841818518186181871818818189181901819118192181931819418195181961819718198181991820018201182021820318204182051820618207182081820918210182111821218213182141821518216182171821818219182201822118222182231822418225182261822718228182291823018231182321823318234182351823618237182381823918240182411824218243182441824518246182471824818249182501825118252182531825418255182561825718258182591826018261182621826318264182651826618267182681826918270182711827218273182741827518276182771827818279182801828118282182831828418285182861828718288182891829018291182921829318294182951829618297182981829918300183011830218303183041830518306183071830818309183101831118312183131831418315183161831718318183191832018321183221832318324183251832618327183281832918330183311833218333183341833518336183371833818339183401834118342183431834418345183461834718348183491835018351183521835318354183551835618357183581835918360183611836218363183641836518366183671836818369183701837118372183731837418375183761837718378183791838018381183821838318384183851838618387183881838918390183911839218393183941839518396183971839818399184001840118402184031840418405184061840718408184091841018411184121841318414184151841618417184181841918420184211842218423184241842518426184271842818429184301843118432184331843418435184361843718438184391844018441184421844318444184451844618447184481844918450184511845218453184541845518456184571845818459184601846118462184631846418465184661846718468184691847018471184721847318474184751847618477184781847918480184811848218483184841848518486184871848818489184901849118492184931849418495184961849718498184991850018501185021850318504185051850618507185081850918510185111851218513185141851518516185171851818519185201852118522185231852418525185261852718528185291853018531185321853318534185351853618537185381853918540185411854218543185441854518546185471854818549185501855118552185531855418555185561855718558185591856018561185621856318564185651856618567185681856918570185711857218573185741857518576185771857818579185801858118582185831858418585185861858718588185891859018591185921859318594185951859618597185981859918600186011860218603186041860518606186071860818609186101861118612186131861418615186161861718618186191862018621186221862318624186251862618627186281862918630186311863218633186341863518636186371863818639186401864118642186431864418645186461864718648186491865018651186521865318654186551865618657186581865918660186611866218663186641866518666186671866818669186701867118672186731867418675186761867718678186791868018681186821868318684186851868618687186881868918690186911869218693186941869518696186971869818699187001870118702187031870418705187061870718708187091871018711187121871318714187151871618717187181871918720187211872218723187241872518726187271872818729187301873118732187331873418735187361873718738187391874018741187421874318744187451874618747187481874918750187511875218753187541875518756187571875818759187601876118762187631876418765187661876718768187691877018771187721877318774187751877618777187781877918780187811878218783187841878518786187871878818789187901879118792187931879418795187961879718798187991880018801188021880318804188051880618807188081880918810188111881218813188141881518816188171881818819188201882118822188231882418825188261882718828188291883018831188321883318834188351883618837188381883918840188411884218843188441884518846188471884818849188501885118852188531885418855188561885718858188591886018861188621886318864188651886618867188681886918870188711887218873188741887518876188771887818879188801888118882188831888418885188861888718888188891889018891188921889318894188951889618897188981889918900189011890218903189041890518906189071890818909189101891118912189131891418915189161891718918189191892018921189221892318924189251892618927189281892918930189311893218933189341893518936189371893818939189401894118942189431894418945189461894718948189491895018951189521895318954189551895618957189581895918960189611896218963189641896518966189671896818969189701897118972189731897418975189761897718978189791898018981189821898318984189851898618987189881898918990189911899218993189941899518996189971899818999190001900119002190031900419005190061900719008190091901019011190121901319014190151901619017190181901919020190211902219023190241902519026190271902819029190301903119032190331903419035190361903719038190391904019041190421904319044190451904619047190481904919050190511905219053190541905519056190571905819059190601906119062190631906419065190661906719068190691907019071190721907319074190751907619077190781907919080190811908219083190841908519086190871908819089190901909119092190931909419095190961909719098190991910019101191021910319104191051910619107191081910919110191111911219113191141911519116191171911819119191201912119122191231912419125191261912719128191291913019131191321913319134191351913619137191381913919140191411914219143191441914519146191471914819149191501915119152191531915419155191561915719158191591916019161191621916319164191651916619167191681916919170191711917219173191741917519176191771917819179191801918119182191831918419185191861918719188191891919019191191921919319194191951919619197191981919919200192011920219203192041920519206192071920819209192101921119212192131921419215192161921719218192191922019221192221922319224192251922619227192281922919230192311923219233192341923519236192371923819239192401924119242192431924419245192461924719248192491925019251192521925319254192551925619257192581925919260192611926219263192641926519266192671926819269192701927119272192731927419275192761927719278192791928019281192821928319284192851928619287192881928919290192911929219293192941929519296192971929819299193001930119302193031930419305193061930719308193091931019311193121931319314193151931619317193181931919320193211932219323193241932519326193271932819329193301933119332193331933419335193361933719338193391934019341193421934319344193451934619347193481934919350193511935219353193541935519356193571935819359193601936119362193631936419365193661936719368193691937019371193721937319374193751937619377193781937919380193811938219383193841938519386193871938819389193901939119392193931939419395193961939719398193991940019401194021940319404194051940619407194081940919410194111941219413194141941519416194171941819419194201942119422194231942419425194261942719428194291943019431194321943319434194351943619437194381943919440194411944219443194441944519446194471944819449194501945119452194531945419455194561945719458194591946019461194621946319464194651946619467194681946919470194711947219473194741947519476194771947819479194801948119482194831948419485194861948719488194891949019491194921949319494194951949619497194981949919500195011950219503195041950519506195071950819509195101951119512195131951419515195161951719518195191952019521195221952319524195251952619527195281952919530195311953219533195341953519536195371953819539195401954119542195431954419545195461954719548195491955019551195521955319554195551955619557195581955919560195611956219563195641956519566195671956819569195701957119572195731957419575195761957719578195791958019581195821958319584195851958619587195881958919590195911959219593195941959519596195971959819599196001960119602196031960419605196061960719608196091961019611196121961319614196151961619617196181961919620196211962219623196241962519626196271962819629196301963119632196331963419635196361963719638196391964019641196421964319644196451964619647196481964919650196511965219653196541965519656196571965819659196601966119662196631966419665196661966719668196691967019671196721967319674196751967619677196781967919680196811968219683196841968519686196871968819689196901969119692196931969419695196961969719698196991970019701197021970319704197051970619707197081970919710197111971219713197141971519716197171971819719197201972119722197231972419725197261972719728197291973019731197321973319734197351973619737197381973919740197411974219743197441974519746197471974819749197501975119752197531975419755197561975719758197591976019761197621976319764197651976619767197681976919770197711977219773197741977519776197771977819779197801978119782197831978419785197861978719788197891979019791197921979319794197951979619797197981979919800198011980219803198041980519806198071980819809198101981119812198131981419815198161981719818198191982019821198221982319824198251982619827198281982919830198311983219833198341983519836198371983819839198401984119842198431984419845198461984719848198491985019851198521985319854198551985619857198581985919860198611986219863198641986519866198671986819869198701987119872198731987419875198761987719878198791988019881198821988319884198851988619887198881988919890198911989219893198941989519896198971989819899199001990119902199031990419905199061990719908199091991019911199121991319914199151991619917199181991919920199211992219923199241992519926199271992819929199301993119932199331993419935199361993719938199391994019941199421994319944199451994619947199481994919950199511995219953199541995519956199571995819959199601996119962199631996419965199661996719968199691997019971199721997319974199751997619977199781997919980199811998219983199841998519986199871998819989199901999119992199931999419995199961999719998199992000020001200022000320004200052000620007200082000920010200112001220013200142001520016200172001820019200202002120022200232002420025200262002720028200292003020031200322003320034200352003620037200382003920040200412004220043200442004520046200472004820049200502005120052200532005420055200562005720058200592006020061200622006320064200652006620067200682006920070200712007220073200742007520076200772007820079200802008120082200832008420085200862008720088200892009020091200922009320094200952009620097200982009920100201012010220103201042010520106201072010820109201102011120112201132011420115201162011720118201192012020121201222012320124201252012620127201282012920130201312013220133201342013520136201372013820139201402014120142201432014420145201462014720148201492015020151201522015320154201552015620157201582015920160201612016220163201642016520166201672016820169201702017120172201732017420175201762017720178201792018020181201822018320184201852018620187201882018920190201912019220193201942019520196201972019820199202002020120202202032020420205202062020720208202092021020211202122021320214202152021620217202182021920220202212022220223202242022520226202272022820229202302023120232202332023420235202362023720238202392024020241202422024320244202452024620247202482024920250202512025220253202542025520256202572025820259202602026120262202632026420265202662026720268202692027020271202722027320274202752027620277202782027920280202812028220283202842028520286202872028820289202902029120292202932029420295202962029720298202992030020301203022030320304203052030620307203082030920310203112031220313203142031520316203172031820319203202032120322203232032420325203262032720328203292033020331203322033320334203352033620337203382033920340203412034220343203442034520346203472034820349203502035120352203532035420355203562035720358203592036020361203622036320364203652036620367203682036920370203712037220373203742037520376203772037820379203802038120382203832038420385203862038720388203892039020391203922039320394203952039620397203982039920400204012040220403204042040520406204072040820409204102041120412204132041420415204162041720418204192042020421204222042320424204252042620427204282042920430204312043220433204342043520436204372043820439204402044120442204432044420445204462044720448204492045020451204522045320454204552045620457204582045920460204612046220463204642046520466204672046820469204702047120472204732047420475204762047720478204792048020481204822048320484204852048620487204882048920490204912049220493204942049520496204972049820499205002050120502205032050420505205062050720508205092051020511205122051320514205152051620517205182051920520205212052220523205242052520526205272052820529205302053120532205332053420535205362053720538205392054020541205422054320544205452054620547205482054920550205512055220553205542055520556205572055820559205602056120562205632056420565205662056720568205692057020571205722057320574205752057620577205782057920580205812058220583205842058520586205872058820589205902059120592205932059420595205962059720598205992060020601206022060320604206052060620607206082060920610206112061220613206142061520616206172061820619206202062120622206232062420625206262062720628206292063020631206322063320634206352063620637206382063920640206412064220643206442064520646206472064820649206502065120652206532065420655206562065720658206592066020661206622066320664206652066620667206682066920670206712067220673206742067520676206772067820679206802068120682206832068420685206862068720688206892069020691206922069320694206952069620697206982069920700207012070220703207042070520706207072070820709207102071120712207132071420715207162071720718207192072020721207222072320724207252072620727207282072920730207312073220733207342073520736207372073820739207402074120742207432074420745207462074720748207492075020751207522075320754207552075620757207582075920760207612076220763207642076520766207672076820769207702077120772207732077420775207762077720778207792078020781207822078320784207852078620787207882078920790207912079220793207942079520796207972079820799208002080120802208032080420805208062080720808208092081020811208122081320814208152081620817208182081920820208212082220823208242082520826208272082820829208302083120832208332083420835208362083720838208392084020841208422084320844208452084620847208482084920850208512085220853208542085520856208572085820859208602086120862208632086420865208662086720868208692087020871208722087320874208752087620877208782087920880208812088220883208842088520886208872088820889208902089120892208932089420895208962089720898208992090020901209022090320904209052090620907209082090920910209112091220913209142091520916209172091820919209202092120922209232092420925209262092720928209292093020931209322093320934209352093620937209382093920940209412094220943209442094520946209472094820949209502095120952209532095420955209562095720958209592096020961209622096320964209652096620967209682096920970209712097220973209742097520976209772097820979
  1. //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
  10. // both before and after the DAG is legalized.
  11. //
  12. // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13. // primarily intended to handle simplification opportunities that are implicit
  14. // in the LLVM IR and exposed by the various codegen lowering phases.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/ADT/APFloat.h"
  18. #include "llvm/ADT/APInt.h"
  19. #include "llvm/ADT/ArrayRef.h"
  20. #include "llvm/ADT/DenseMap.h"
  21. #include "llvm/ADT/IntervalMap.h"
  22. #include "llvm/ADT/None.h"
  23. #include "llvm/ADT/Optional.h"
  24. #include "llvm/ADT/STLExtras.h"
  25. #include "llvm/ADT/SetVector.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/Analysis/AliasAnalysis.h"
  31. #include "llvm/Analysis/MemoryLocation.h"
  32. #include "llvm/CodeGen/DAGCombine.h"
  33. #include "llvm/CodeGen/ISDOpcodes.h"
  34. #include "llvm/CodeGen/MachineFrameInfo.h"
  35. #include "llvm/CodeGen/MachineFunction.h"
  36. #include "llvm/CodeGen/MachineMemOperand.h"
  37. #include "llvm/CodeGen/RuntimeLibcalls.h"
  38. #include "llvm/CodeGen/SelectionDAG.h"
  39. #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  40. #include "llvm/CodeGen/SelectionDAGNodes.h"
  41. #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  42. #include "llvm/CodeGen/TargetLowering.h"
  43. #include "llvm/CodeGen/TargetRegisterInfo.h"
  44. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  45. #include "llvm/CodeGen/ValueTypes.h"
  46. #include "llvm/IR/Attributes.h"
  47. #include "llvm/IR/Constant.h"
  48. #include "llvm/IR/DataLayout.h"
  49. #include "llvm/IR/DerivedTypes.h"
  50. #include "llvm/IR/Function.h"
  51. #include "llvm/IR/LLVMContext.h"
  52. #include "llvm/IR/Metadata.h"
  53. #include "llvm/Support/Casting.h"
  54. #include "llvm/Support/CodeGen.h"
  55. #include "llvm/Support/CommandLine.h"
  56. #include "llvm/Support/Compiler.h"
  57. #include "llvm/Support/Debug.h"
  58. #include "llvm/Support/ErrorHandling.h"
  59. #include "llvm/Support/KnownBits.h"
  60. #include "llvm/Support/MachineValueType.h"
  61. #include "llvm/Support/MathExtras.h"
  62. #include "llvm/Support/raw_ostream.h"
  63. #include "llvm/Target/TargetMachine.h"
  64. #include "llvm/Target/TargetOptions.h"
  65. #include <algorithm>
  66. #include <cassert>
  67. #include <cstdint>
  68. #include <functional>
  69. #include <iterator>
  70. #include <string>
  71. #include <tuple>
  72. #include <utility>
  73. using namespace llvm;
  74. #define DEBUG_TYPE "dagcombine"
  75. STATISTIC(NodesCombined , "Number of dag nodes combined");
  76. STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  77. STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  78. STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
  79. STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
  80. STATISTIC(SlicedLoads, "Number of load sliced");
  81. STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  82. static cl::opt<bool>
  83. CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  84. cl::desc("Enable DAG combiner's use of IR alias analysis"));
  85. static cl::opt<bool>
  86. UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  87. cl::desc("Enable DAG combiner's use of TBAA"));
  88. #ifndef NDEBUG
  89. static cl::opt<std::string>
  90. CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  91. cl::desc("Only use DAG-combiner alias analysis in this"
  92. " function"));
  93. #endif
  94. /// Hidden option to stress test load slicing, i.e., when this option
  95. /// is enabled, load slicing bypasses most of its profitability guards.
  96. static cl::opt<bool>
  97. StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
  98. cl::desc("Bypass the profitability model of load slicing"),
  99. cl::init(false));
  100. static cl::opt<bool>
  101. MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
  102. cl::desc("DAG combiner may split indexing from loads"));
  103. static cl::opt<bool>
  104. EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
  105. cl::desc("DAG combiner enable merging multiple stores "
  106. "into a wider store"));
  107. static cl::opt<unsigned> TokenFactorInlineLimit(
  108. "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
  109. cl::desc("Limit the number of operands to inline for Token Factors"));
  110. static cl::opt<unsigned> StoreMergeDependenceLimit(
  111. "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
  112. cl::desc("Limit the number of times for the same StoreNode and RootNode "
  113. "to bail out in store merging dependence check"));
  114. namespace {
  115. class DAGCombiner {
  116. SelectionDAG &DAG;
  117. const TargetLowering &TLI;
  118. CombineLevel Level;
  119. CodeGenOpt::Level OptLevel;
  120. bool LegalOperations = false;
  121. bool LegalTypes = false;
  122. bool ForCodeSize;
  123. /// Worklist of all of the nodes that need to be simplified.
  124. ///
  125. /// This must behave as a stack -- new nodes to process are pushed onto the
  126. /// back and when processing we pop off of the back.
  127. ///
  128. /// The worklist will not contain duplicates but may contain null entries
  129. /// due to nodes being deleted from the underlying DAG.
  130. SmallVector<SDNode *, 64> Worklist;
  131. /// Mapping from an SDNode to its position on the worklist.
  132. ///
  133. /// This is used to find and remove nodes from the worklist (by nulling
  134. /// them) when they are deleted from the underlying DAG. It relies on
  135. /// stable indices of nodes within the worklist.
  136. DenseMap<SDNode *, unsigned> WorklistMap;
  137. /// This records all nodes attempted to add to the worklist since we
  138. /// considered a new worklist entry. As we keep do not add duplicate nodes
  139. /// in the worklist, this is different from the tail of the worklist.
  140. SmallSetVector<SDNode *, 32> PruningList;
  141. /// Set of nodes which have been combined (at least once).
  142. ///
  143. /// This is used to allow us to reliably add any operands of a DAG node
  144. /// which have not yet been combined to the worklist.
  145. SmallPtrSet<SDNode *, 32> CombinedNodes;
  146. /// Map from candidate StoreNode to the pair of RootNode and count.
  147. /// The count is used to track how many times we have seen the StoreNode
  148. /// with the same RootNode bail out in dependence check. If we have seen
  149. /// the bail out for the same pair many times over a limit, we won't
  150. /// consider the StoreNode with the same RootNode as store merging
  151. /// candidate again.
  152. DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
  153. // AA - Used for DAG load/store alias analysis.
  154. AliasAnalysis *AA;
  155. /// When an instruction is simplified, add all users of the instruction to
  156. /// the work lists because they might get more simplified now.
  157. void AddUsersToWorklist(SDNode *N) {
  158. for (SDNode *Node : N->uses())
  159. AddToWorklist(Node);
  160. }
  161. // Prune potentially dangling nodes. This is called after
  162. // any visit to a node, but should also be called during a visit after any
  163. // failed combine which may have created a DAG node.
  164. void clearAddedDanglingWorklistEntries() {
  165. // Check any nodes added to the worklist to see if they are prunable.
  166. while (!PruningList.empty()) {
  167. auto *N = PruningList.pop_back_val();
  168. if (N->use_empty())
  169. recursivelyDeleteUnusedNodes(N);
  170. }
  171. }
  172. SDNode *getNextWorklistEntry() {
  173. // Before we do any work, remove nodes that are not in use.
  174. clearAddedDanglingWorklistEntries();
  175. SDNode *N = nullptr;
  176. // The Worklist holds the SDNodes in order, but it may contain null
  177. // entries.
  178. while (!N && !Worklist.empty()) {
  179. N = Worklist.pop_back_val();
  180. }
  181. if (N) {
  182. bool GoodWorklistEntry = WorklistMap.erase(N);
  183. (void)GoodWorklistEntry;
  184. assert(GoodWorklistEntry &&
  185. "Found a worklist entry without a corresponding map entry!");
  186. }
  187. return N;
  188. }
  189. /// Call the node-specific routine that folds each particular type of node.
  190. SDValue visit(SDNode *N);
  191. public:
  192. DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
  193. : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
  194. OptLevel(OL), AA(AA) {
  195. ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
  196. MaximumLegalStoreInBits = 0;
  197. for (MVT VT : MVT::all_valuetypes())
  198. if (EVT(VT).isSimple() && VT != MVT::Other &&
  199. TLI.isTypeLegal(EVT(VT)) &&
  200. VT.getSizeInBits() >= MaximumLegalStoreInBits)
  201. MaximumLegalStoreInBits = VT.getSizeInBits();
  202. }
  203. void ConsiderForPruning(SDNode *N) {
  204. // Mark this for potential pruning.
  205. PruningList.insert(N);
  206. }
  207. /// Add to the worklist making sure its instance is at the back (next to be
  208. /// processed.)
  209. void AddToWorklist(SDNode *N) {
  210. assert(N->getOpcode() != ISD::DELETED_NODE &&
  211. "Deleted Node added to Worklist");
  212. // Skip handle nodes as they can't usefully be combined and confuse the
  213. // zero-use deletion strategy.
  214. if (N->getOpcode() == ISD::HANDLENODE)
  215. return;
  216. ConsiderForPruning(N);
  217. if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
  218. Worklist.push_back(N);
  219. }
  220. /// Remove all instances of N from the worklist.
  221. void removeFromWorklist(SDNode *N) {
  222. CombinedNodes.erase(N);
  223. PruningList.remove(N);
  224. StoreRootCountMap.erase(N);
  225. auto It = WorklistMap.find(N);
  226. if (It == WorklistMap.end())
  227. return; // Not in the worklist.
  228. // Null out the entry rather than erasing it to avoid a linear operation.
  229. Worklist[It->second] = nullptr;
  230. WorklistMap.erase(It);
  231. }
  232. void deleteAndRecombine(SDNode *N);
  233. bool recursivelyDeleteUnusedNodes(SDNode *N);
  234. /// Replaces all uses of the results of one DAG node with new values.
  235. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  236. bool AddTo = true);
  237. /// Replaces all uses of the results of one DAG node with new values.
  238. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
  239. return CombineTo(N, &Res, 1, AddTo);
  240. }
  241. /// Replaces all uses of the results of one DAG node with new values.
  242. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
  243. bool AddTo = true) {
  244. SDValue To[] = { Res0, Res1 };
  245. return CombineTo(N, To, 2, AddTo);
  246. }
  247. void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
  248. private:
  249. unsigned MaximumLegalStoreInBits;
  250. /// Check the specified integer node value to see if it can be simplified or
  251. /// if things it uses can be simplified by bit propagation.
  252. /// If so, return true.
  253. bool SimplifyDemandedBits(SDValue Op) {
  254. unsigned BitWidth = Op.getScalarValueSizeInBits();
  255. APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
  256. return SimplifyDemandedBits(Op, DemandedBits);
  257. }
  258. bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
  259. EVT VT = Op.getValueType();
  260. unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
  261. APInt DemandedElts = APInt::getAllOnesValue(NumElts);
  262. return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
  263. }
  264. /// Check the specified vector node value to see if it can be simplified or
  265. /// if things it uses can be simplified as it only uses some of the
  266. /// elements. If so, return true.
  267. bool SimplifyDemandedVectorElts(SDValue Op) {
  268. unsigned NumElts = Op.getValueType().getVectorNumElements();
  269. APInt DemandedElts = APInt::getAllOnesValue(NumElts);
  270. return SimplifyDemandedVectorElts(Op, DemandedElts);
  271. }
  272. bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  273. const APInt &DemandedElts);
  274. bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
  275. bool AssumeSingleUse = false);
  276. bool CombineToPreIndexedLoadStore(SDNode *N);
  277. bool CombineToPostIndexedLoadStore(SDNode *N);
  278. SDValue SplitIndexingFromLoad(LoadSDNode *LD);
  279. bool SliceUpLoad(SDNode *N);
  280. // Scalars have size 0 to distinguish from singleton vectors.
  281. SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
  282. bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
  283. bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
  284. /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
  285. /// load.
  286. ///
  287. /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
  288. /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
  289. /// \param EltNo index of the vector element to load.
  290. /// \param OriginalLoad load that EVE came from to be replaced.
  291. /// \returns EVE on success SDValue() on failure.
  292. SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
  293. SDValue EltNo,
  294. LoadSDNode *OriginalLoad);
  295. void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
  296. SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
  297. SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
  298. SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
  299. SDValue PromoteIntBinOp(SDValue Op);
  300. SDValue PromoteIntShiftOp(SDValue Op);
  301. SDValue PromoteExtend(SDValue Op);
  302. bool PromoteLoad(SDValue Op);
  303. /// Call the node-specific routine that knows how to fold each
  304. /// particular type of node. If that doesn't do anything, try the
  305. /// target-specific DAG combines.
  306. SDValue combine(SDNode *N);
  307. // Visitation implementation - Implement dag node combining for different
  308. // node types. The semantics are as follows:
  309. // Return Value:
  310. // SDValue.getNode() == 0 - No change was made
  311. // SDValue.getNode() == N - N was replaced, is dead and has been handled.
  312. // otherwise - N should be replaced by the returned Operand.
  313. //
  314. SDValue visitTokenFactor(SDNode *N);
  315. SDValue visitMERGE_VALUES(SDNode *N);
  316. SDValue visitADD(SDNode *N);
  317. SDValue visitADDLike(SDNode *N);
  318. SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
  319. SDValue visitSUB(SDNode *N);
  320. SDValue visitADDSAT(SDNode *N);
  321. SDValue visitSUBSAT(SDNode *N);
  322. SDValue visitADDC(SDNode *N);
  323. SDValue visitADDO(SDNode *N);
  324. SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
  325. SDValue visitSUBC(SDNode *N);
  326. SDValue visitSUBO(SDNode *N);
  327. SDValue visitADDE(SDNode *N);
  328. SDValue visitADDCARRY(SDNode *N);
  329. SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
  330. SDValue visitSUBE(SDNode *N);
  331. SDValue visitSUBCARRY(SDNode *N);
  332. SDValue visitMUL(SDNode *N);
  333. SDValue visitMULFIX(SDNode *N);
  334. SDValue useDivRem(SDNode *N);
  335. SDValue visitSDIV(SDNode *N);
  336. SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
  337. SDValue visitUDIV(SDNode *N);
  338. SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
  339. SDValue visitREM(SDNode *N);
  340. SDValue visitMULHU(SDNode *N);
  341. SDValue visitMULHS(SDNode *N);
  342. SDValue visitSMUL_LOHI(SDNode *N);
  343. SDValue visitUMUL_LOHI(SDNode *N);
  344. SDValue visitMULO(SDNode *N);
  345. SDValue visitIMINMAX(SDNode *N);
  346. SDValue visitAND(SDNode *N);
  347. SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
  348. SDValue visitOR(SDNode *N);
  349. SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
  350. SDValue visitXOR(SDNode *N);
  351. SDValue SimplifyVBinOp(SDNode *N);
  352. SDValue visitSHL(SDNode *N);
  353. SDValue visitSRA(SDNode *N);
  354. SDValue visitSRL(SDNode *N);
  355. SDValue visitFunnelShift(SDNode *N);
  356. SDValue visitRotate(SDNode *N);
  357. SDValue visitABS(SDNode *N);
  358. SDValue visitBSWAP(SDNode *N);
  359. SDValue visitBITREVERSE(SDNode *N);
  360. SDValue visitCTLZ(SDNode *N);
  361. SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
  362. SDValue visitCTTZ(SDNode *N);
  363. SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
  364. SDValue visitCTPOP(SDNode *N);
  365. SDValue visitSELECT(SDNode *N);
  366. SDValue visitVSELECT(SDNode *N);
  367. SDValue visitSELECT_CC(SDNode *N);
  368. SDValue visitSETCC(SDNode *N);
  369. SDValue visitSETCCCARRY(SDNode *N);
  370. SDValue visitSIGN_EXTEND(SDNode *N);
  371. SDValue visitZERO_EXTEND(SDNode *N);
  372. SDValue visitANY_EXTEND(SDNode *N);
  373. SDValue visitAssertExt(SDNode *N);
  374. SDValue visitSIGN_EXTEND_INREG(SDNode *N);
  375. SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
  376. SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
  377. SDValue visitTRUNCATE(SDNode *N);
  378. SDValue visitBITCAST(SDNode *N);
  379. SDValue visitBUILD_PAIR(SDNode *N);
  380. SDValue visitFADD(SDNode *N);
  381. SDValue visitFSUB(SDNode *N);
  382. SDValue visitFMUL(SDNode *N);
  383. SDValue visitFMA(SDNode *N);
  384. SDValue visitFDIV(SDNode *N);
  385. SDValue visitFREM(SDNode *N);
  386. SDValue visitFSQRT(SDNode *N);
  387. SDValue visitFCOPYSIGN(SDNode *N);
  388. SDValue visitFPOW(SDNode *N);
  389. SDValue visitSINT_TO_FP(SDNode *N);
  390. SDValue visitUINT_TO_FP(SDNode *N);
  391. SDValue visitFP_TO_SINT(SDNode *N);
  392. SDValue visitFP_TO_UINT(SDNode *N);
  393. SDValue visitFP_ROUND(SDNode *N);
  394. SDValue visitFP_EXTEND(SDNode *N);
  395. SDValue visitFNEG(SDNode *N);
  396. SDValue visitFABS(SDNode *N);
  397. SDValue visitFCEIL(SDNode *N);
  398. SDValue visitFTRUNC(SDNode *N);
  399. SDValue visitFFLOOR(SDNode *N);
  400. SDValue visitFMINNUM(SDNode *N);
  401. SDValue visitFMAXNUM(SDNode *N);
  402. SDValue visitFMINIMUM(SDNode *N);
  403. SDValue visitFMAXIMUM(SDNode *N);
  404. SDValue visitBRCOND(SDNode *N);
  405. SDValue visitBR_CC(SDNode *N);
  406. SDValue visitLOAD(SDNode *N);
  407. SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
  408. SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
  409. SDValue visitSTORE(SDNode *N);
  410. SDValue visitLIFETIME_END(SDNode *N);
  411. SDValue visitINSERT_VECTOR_ELT(SDNode *N);
  412. SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
  413. SDValue visitBUILD_VECTOR(SDNode *N);
  414. SDValue visitCONCAT_VECTORS(SDNode *N);
  415. SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
  416. SDValue visitVECTOR_SHUFFLE(SDNode *N);
  417. SDValue visitSCALAR_TO_VECTOR(SDNode *N);
  418. SDValue visitINSERT_SUBVECTOR(SDNode *N);
  419. SDValue visitMLOAD(SDNode *N);
  420. SDValue visitMSTORE(SDNode *N);
  421. SDValue visitMGATHER(SDNode *N);
  422. SDValue visitMSCATTER(SDNode *N);
  423. SDValue visitFP_TO_FP16(SDNode *N);
  424. SDValue visitFP16_TO_FP(SDNode *N);
  425. SDValue visitVECREDUCE(SDNode *N);
  426. SDValue visitFADDForFMACombine(SDNode *N);
  427. SDValue visitFSUBForFMACombine(SDNode *N);
  428. SDValue visitFMULForFMADistributiveCombine(SDNode *N);
  429. SDValue XformToShuffleWithZero(SDNode *N);
  430. bool reassociationCanBreakAddressingModePattern(unsigned Opc,
  431. const SDLoc &DL, SDValue N0,
  432. SDValue N1);
  433. SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
  434. SDValue N1);
  435. SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
  436. SDValue N1, SDNodeFlags Flags);
  437. SDValue visitShiftByConstant(SDNode *N);
  438. SDValue foldSelectOfConstants(SDNode *N);
  439. SDValue foldVSelectOfConstants(SDNode *N);
  440. SDValue foldBinOpIntoSelect(SDNode *BO);
  441. bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
  442. SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
  443. SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
  444. SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
  445. SDValue N2, SDValue N3, ISD::CondCode CC,
  446. bool NotExtCompare = false);
  447. SDValue convertSelectOfFPConstantsToLoadOffset(
  448. const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
  449. ISD::CondCode CC);
  450. SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
  451. SDValue N2, SDValue N3, ISD::CondCode CC);
  452. SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
  453. const SDLoc &DL);
  454. SDValue unfoldMaskedMerge(SDNode *N);
  455. SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
  456. SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
  457. const SDLoc &DL, bool foldBooleans);
  458. SDValue rebuildSetCC(SDValue N);
  459. bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
  460. SDValue &CC) const;
  461. bool isOneUseSetCC(SDValue N) const;
  462. bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
  463. SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  464. unsigned HiOp);
  465. SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
  466. SDValue CombineExtLoad(SDNode *N);
  467. SDValue CombineZExtLogicopShiftLoad(SDNode *N);
  468. SDValue combineRepeatedFPDivisors(SDNode *N);
  469. SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
  470. SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
  471. SDValue BuildSDIV(SDNode *N);
  472. SDValue BuildSDIVPow2(SDNode *N);
  473. SDValue BuildUDIV(SDNode *N);
  474. SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
  475. SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
  476. SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
  477. SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
  478. SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
  479. SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
  480. SDNodeFlags Flags, bool Reciprocal);
  481. SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
  482. SDNodeFlags Flags, bool Reciprocal);
  483. SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  484. bool DemandHighBits = true);
  485. SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
  486. SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
  487. SDValue InnerPos, SDValue InnerNeg,
  488. unsigned PosOpcode, unsigned NegOpcode,
  489. const SDLoc &DL);
  490. SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
  491. SDValue MatchLoadCombine(SDNode *N);
  492. SDValue MatchStoreCombine(StoreSDNode *N);
  493. SDValue ReduceLoadWidth(SDNode *N);
  494. SDValue ReduceLoadOpStoreWidth(SDNode *N);
  495. SDValue splitMergedValStore(StoreSDNode *ST);
  496. SDValue TransformFPLoadStorePair(SDNode *N);
  497. SDValue convertBuildVecZextToZext(SDNode *N);
  498. SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
  499. SDValue reduceBuildVecToShuffle(SDNode *N);
  500. SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
  501. ArrayRef<int> VectorMask, SDValue VecIn1,
  502. SDValue VecIn2, unsigned LeftIdx,
  503. bool DidSplitVec);
  504. SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
  505. /// Walk up chain skipping non-aliasing memory nodes,
  506. /// looking for aliasing nodes and adding them to the Aliases vector.
  507. void GatherAllAliases(SDNode *N, SDValue OriginalChain,
  508. SmallVectorImpl<SDValue> &Aliases);
  509. /// Return true if there is any possibility that the two addresses overlap.
  510. bool isAlias(SDNode *Op0, SDNode *Op1) const;
  511. /// Walk up chain skipping non-aliasing memory nodes, looking for a better
  512. /// chain (aliasing node.)
  513. SDValue FindBetterChain(SDNode *N, SDValue Chain);
  514. /// Try to replace a store and any possibly adjacent stores on
  515. /// consecutive chains with better chains. Return true only if St is
  516. /// replaced.
  517. ///
  518. /// Notice that other chains may still be replaced even if the function
  519. /// returns false.
  520. bool findBetterNeighborChains(StoreSDNode *St);
  521. // Helper for findBetterNeighborChains. Walk up store chain add additional
  522. // chained stores that do not overlap and can be parallelized.
  523. bool parallelizeChainedStores(StoreSDNode *St);
  524. /// Holds a pointer to an LSBaseSDNode as well as information on where it
  525. /// is located in a sequence of memory operations connected by a chain.
  526. struct MemOpLink {
  527. // Ptr to the mem node.
  528. LSBaseSDNode *MemNode;
  529. // Offset from the base ptr.
  530. int64_t OffsetFromBase;
  531. MemOpLink(LSBaseSDNode *N, int64_t Offset)
  532. : MemNode(N), OffsetFromBase(Offset) {}
  533. };
  534. /// This is a helper function for visitMUL to check the profitability
  535. /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
  536. /// MulNode is the original multiply, AddNode is (add x, c1),
  537. /// and ConstNode is c2.
  538. bool isMulAddWithConstProfitable(SDNode *MulNode,
  539. SDValue &AddNode,
  540. SDValue &ConstNode);
  541. /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
  542. /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
  543. /// the type of the loaded value to be extended.
  544. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
  545. EVT LoadResultTy, EVT &ExtVT);
  546. /// Helper function to calculate whether the given Load/Store can have its
  547. /// width reduced to ExtVT.
  548. bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
  549. EVT &MemVT, unsigned ShAmt = 0);
  550. /// Used by BackwardsPropagateMask to find suitable loads.
  551. bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
  552. SmallPtrSetImpl<SDNode*> &NodesWithConsts,
  553. ConstantSDNode *Mask, SDNode *&NodeToMask);
  554. /// Attempt to propagate a given AND node back to load leaves so that they
  555. /// can be combined into narrow loads.
  556. bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
  557. /// Helper function for MergeConsecutiveStores which merges the
  558. /// component store chains.
  559. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
  560. unsigned NumStores);
  561. /// This is a helper function for MergeConsecutiveStores. When the
  562. /// source elements of the consecutive stores are all constants or
  563. /// all extracted vector elements, try to merge them into one
  564. /// larger store introducing bitcasts if necessary. \return True
  565. /// if a merged store was created.
  566. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
  567. EVT MemVT, unsigned NumStores,
  568. bool IsConstantSrc, bool UseVector,
  569. bool UseTrunc);
  570. /// This is a helper function for MergeConsecutiveStores. Stores
  571. /// that potentially may be merged with St are placed in
  572. /// StoreNodes. RootNode is a chain predecessor to all store
  573. /// candidates.
  574. void getStoreMergeCandidates(StoreSDNode *St,
  575. SmallVectorImpl<MemOpLink> &StoreNodes,
  576. SDNode *&Root);
  577. /// Helper function for MergeConsecutiveStores. Checks if
  578. /// candidate stores have indirect dependency through their
  579. /// operands. RootNode is the predecessor to all stores calculated
  580. /// by getStoreMergeCandidates and is used to prune the dependency check.
  581. /// \return True if safe to merge.
  582. bool checkMergeStoreCandidatesForDependencies(
  583. SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
  584. SDNode *RootNode);
  585. /// Merge consecutive store operations into a wide store.
  586. /// This optimization uses wide integers or vectors when possible.
  587. /// \return number of stores that were merged into a merged store (the
  588. /// affected nodes are stored as a prefix in \p StoreNodes).
  589. bool MergeConsecutiveStores(StoreSDNode *St);
  590. /// Try to transform a truncation where C is a constant:
  591. /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
  592. ///
  593. /// \p N needs to be a truncation and its first operand an AND. Other
  594. /// requirements are checked by the function (e.g. that trunc is
  595. /// single-use) and if missed an empty SDValue is returned.
  596. SDValue distributeTruncateThroughAnd(SDNode *N);
  597. /// Helper function to determine whether the target supports operation
  598. /// given by \p Opcode for type \p VT, that is, whether the operation
  599. /// is legal or custom before legalizing operations, and whether is
  600. /// legal (but not custom) after legalization.
  601. bool hasOperation(unsigned Opcode, EVT VT) {
  602. if (LegalOperations)
  603. return TLI.isOperationLegal(Opcode, VT);
  604. return TLI.isOperationLegalOrCustom(Opcode, VT);
  605. }
  606. public:
  607. /// Runs the dag combiner on all nodes in the work list
  608. void Run(CombineLevel AtLevel);
  609. SelectionDAG &getDAG() const { return DAG; }
  610. /// Returns a type large enough to hold any valid shift amount - before type
  611. /// legalization these can be huge.
  612. EVT getShiftAmountTy(EVT LHSTy) {
  613. assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
  614. return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
  615. }
  616. /// This method returns true if we are running before type legalization or
  617. /// if the specified VT is legal.
  618. bool isTypeLegal(const EVT &VT) {
  619. if (!LegalTypes) return true;
  620. return TLI.isTypeLegal(VT);
  621. }
  622. /// Convenience wrapper around TargetLowering::getSetCCResultType
  623. EVT getSetCCResultType(EVT VT) const {
  624. return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  625. }
  626. void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  627. SDValue OrigLoad, SDValue ExtLoad,
  628. ISD::NodeType ExtType);
  629. };
  630. /// This class is a DAGUpdateListener that removes any deleted
  631. /// nodes from the worklist.
  632. class WorklistRemover : public SelectionDAG::DAGUpdateListener {
  633. DAGCombiner &DC;
  634. public:
  635. explicit WorklistRemover(DAGCombiner &dc)
  636. : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
  637. void NodeDeleted(SDNode *N, SDNode *E) override {
  638. DC.removeFromWorklist(N);
  639. }
  640. };
  641. class WorklistInserter : public SelectionDAG::DAGUpdateListener {
  642. DAGCombiner &DC;
  643. public:
  644. explicit WorklistInserter(DAGCombiner &dc)
  645. : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
  646. // FIXME: Ideally we could add N to the worklist, but this causes exponential
  647. // compile time costs in large DAGs, e.g. Halide.
  648. void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
  649. };
  650. } // end anonymous namespace
  651. //===----------------------------------------------------------------------===//
  652. // TargetLowering::DAGCombinerInfo implementation
  653. //===----------------------------------------------------------------------===//
  654. void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
  655. ((DAGCombiner*)DC)->AddToWorklist(N);
  656. }
  657. SDValue TargetLowering::DAGCombinerInfo::
  658. CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
  659. return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
  660. }
  661. SDValue TargetLowering::DAGCombinerInfo::
  662. CombineTo(SDNode *N, SDValue Res, bool AddTo) {
  663. return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
  664. }
  665. SDValue TargetLowering::DAGCombinerInfo::
  666. CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
  667. return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
  668. }
  669. void TargetLowering::DAGCombinerInfo::
  670. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  671. return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
  672. }
  673. //===----------------------------------------------------------------------===//
  674. // Helper Functions
  675. //===----------------------------------------------------------------------===//
  676. void DAGCombiner::deleteAndRecombine(SDNode *N) {
  677. removeFromWorklist(N);
  678. // If the operands of this node are only used by the node, they will now be
  679. // dead. Make sure to re-visit them and recursively delete dead nodes.
  680. for (const SDValue &Op : N->ops())
  681. // For an operand generating multiple values, one of the values may
  682. // become dead allowing further simplification (e.g. split index
  683. // arithmetic from an indexed load).
  684. if (Op->hasOneUse() || Op->getNumValues() > 1)
  685. AddToWorklist(Op.getNode());
  686. DAG.DeleteNode(N);
  687. }
  688. /// Return 1 if we can compute the negated form of the specified expression for
  689. /// the same cost as the expression itself, or 2 if we can compute the negated
  690. /// form more cheaply than the expression itself.
  691. static char isNegatibleForFree(SDValue Op, bool LegalOperations,
  692. const TargetLowering &TLI,
  693. const TargetOptions *Options,
  694. bool ForCodeSize,
  695. unsigned Depth = 0) {
  696. // fneg is removable even if it has multiple uses.
  697. if (Op.getOpcode() == ISD::FNEG)
  698. return 2;
  699. // Don't allow anything with multiple uses unless we know it is free.
  700. EVT VT = Op.getValueType();
  701. const SDNodeFlags Flags = Op->getFlags();
  702. if (!Op.hasOneUse() &&
  703. !(Op.getOpcode() == ISD::FP_EXTEND &&
  704. TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
  705. return 0;
  706. // Don't recurse exponentially.
  707. if (Depth > 6)
  708. return 0;
  709. switch (Op.getOpcode()) {
  710. default: return false;
  711. case ISD::ConstantFP: {
  712. if (!LegalOperations)
  713. return 1;
  714. // Don't invert constant FP values after legalization unless the target says
  715. // the negated constant is legal.
  716. return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
  717. TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
  718. ForCodeSize);
  719. }
  720. case ISD::BUILD_VECTOR: {
  721. // Only permit BUILD_VECTOR of constants.
  722. if (llvm::any_of(Op->op_values(), [&](SDValue N) {
  723. return !N.isUndef() && !isa<ConstantFPSDNode>(N);
  724. }))
  725. return 0;
  726. if (!LegalOperations)
  727. return 1;
  728. if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
  729. TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  730. return 1;
  731. return llvm::all_of(Op->op_values(), [&](SDValue N) {
  732. return N.isUndef() ||
  733. TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
  734. ForCodeSize);
  735. });
  736. }
  737. case ISD::FADD:
  738. if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  739. return 0;
  740. // After operation legalization, it might not be legal to create new FSUBs.
  741. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
  742. return 0;
  743. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  744. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  745. Options, ForCodeSize, Depth + 1))
  746. return V;
  747. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  748. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  749. ForCodeSize, Depth + 1);
  750. case ISD::FSUB:
  751. // We can't turn -(A-B) into B-A when we honor signed zeros.
  752. if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  753. return 0;
  754. // fold (fneg (fsub A, B)) -> (fsub B, A)
  755. return 1;
  756. case ISD::FMUL:
  757. case ISD::FDIV:
  758. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
  759. if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  760. Options, ForCodeSize, Depth + 1))
  761. return V;
  762. // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
  763. if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
  764. if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
  765. return 0;
  766. return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
  767. ForCodeSize, Depth + 1);
  768. case ISD::FMA:
  769. case ISD::FMAD: {
  770. if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
  771. return 0;
  772. // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
  773. // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
  774. char V2 = isNegatibleForFree(Op.getOperand(2), LegalOperations, TLI,
  775. Options, ForCodeSize, Depth + 1);
  776. if (!V2)
  777. return 0;
  778. // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
  779. char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
  780. Options, ForCodeSize, Depth + 1);
  781. char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI,
  782. Options, ForCodeSize, Depth + 1);
  783. char V01 = std::max(V0, V1);
  784. return V01 ? std::max(V01, V2) : 0;
  785. }
  786. case ISD::FP_EXTEND:
  787. case ISD::FP_ROUND:
  788. case ISD::FSIN:
  789. return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
  790. ForCodeSize, Depth + 1);
  791. }
  792. }
  793. /// If isNegatibleForFree returns true, return the newly negated expression.
  794. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  795. bool LegalOperations, bool ForCodeSize,
  796. unsigned Depth = 0) {
  797. // fneg is removable even if it has multiple uses.
  798. if (Op.getOpcode() == ISD::FNEG)
  799. return Op.getOperand(0);
  800. assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
  801. const TargetOptions &Options = DAG.getTarget().Options;
  802. const SDNodeFlags Flags = Op->getFlags();
  803. switch (Op.getOpcode()) {
  804. default: llvm_unreachable("Unknown code");
  805. case ISD::ConstantFP: {
  806. APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
  807. V.changeSign();
  808. return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
  809. }
  810. case ISD::BUILD_VECTOR: {
  811. SmallVector<SDValue, 4> Ops;
  812. for (SDValue C : Op->op_values()) {
  813. if (C.isUndef()) {
  814. Ops.push_back(C);
  815. continue;
  816. }
  817. APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
  818. V.changeSign();
  819. Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
  820. }
  821. return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
  822. }
  823. case ISD::FADD:
  824. assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
  825. "Expected NSZ fp-flag");
  826. // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
  827. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  828. DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
  829. Depth + 1))
  830. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  831. GetNegatedExpression(Op.getOperand(0), DAG,
  832. LegalOperations, ForCodeSize,
  833. Depth + 1),
  834. Op.getOperand(1), Flags);
  835. // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
  836. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  837. GetNegatedExpression(Op.getOperand(1), DAG,
  838. LegalOperations, ForCodeSize,
  839. Depth + 1),
  840. Op.getOperand(0), Flags);
  841. case ISD::FSUB:
  842. // fold (fneg (fsub 0, B)) -> B
  843. if (ConstantFPSDNode *N0CFP =
  844. isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
  845. if (N0CFP->isZero())
  846. return Op.getOperand(1);
  847. // fold (fneg (fsub A, B)) -> (fsub B, A)
  848. return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
  849. Op.getOperand(1), Op.getOperand(0), Flags);
  850. case ISD::FMUL:
  851. case ISD::FDIV:
  852. // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
  853. if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
  854. DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
  855. Depth + 1))
  856. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  857. GetNegatedExpression(Op.getOperand(0), DAG,
  858. LegalOperations, ForCodeSize,
  859. Depth + 1),
  860. Op.getOperand(1), Flags);
  861. // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
  862. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  863. Op.getOperand(0),
  864. GetNegatedExpression(Op.getOperand(1), DAG,
  865. LegalOperations, ForCodeSize,
  866. Depth + 1), Flags);
  867. case ISD::FMA:
  868. case ISD::FMAD: {
  869. assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
  870. "Expected NSZ fp-flag");
  871. SDValue Neg2 = GetNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
  872. ForCodeSize, Depth + 1);
  873. char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations,
  874. DAG.getTargetLoweringInfo(), &Options,
  875. ForCodeSize, Depth + 1);
  876. char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations,
  877. DAG.getTargetLoweringInfo(), &Options,
  878. ForCodeSize, Depth + 1);
  879. if (V0 >= V1) {
  880. // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
  881. SDValue Neg0 = GetNegatedExpression(
  882. Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
  883. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
  884. Op.getOperand(1), Neg2, Flags);
  885. }
  886. // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
  887. SDValue Neg1 = GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
  888. ForCodeSize, Depth + 1);
  889. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  890. Op.getOperand(0), Neg1, Neg2, Flags);
  891. }
  892. case ISD::FP_EXTEND:
  893. case ISD::FSIN:
  894. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
  895. GetNegatedExpression(Op.getOperand(0), DAG,
  896. LegalOperations, ForCodeSize,
  897. Depth + 1));
  898. case ISD::FP_ROUND:
  899. return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
  900. GetNegatedExpression(Op.getOperand(0), DAG,
  901. LegalOperations, ForCodeSize,
  902. Depth + 1),
  903. Op.getOperand(1));
  904. }
  905. }
  906. // APInts must be the same size for most operations, this helper
  907. // function zero extends the shorter of the pair so that they match.
  908. // We provide an Offset so that we can create bitwidths that won't overflow.
  909. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
  910. unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
  911. LHS = LHS.zextOrSelf(Bits);
  912. RHS = RHS.zextOrSelf(Bits);
  913. }
  914. // Return true if this node is a setcc, or is a select_cc
  915. // that selects between the target values used for true and false, making it
  916. // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
  917. // the appropriate nodes based on the type of node we are checking. This
  918. // simplifies life a bit for the callers.
  919. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
  920. SDValue &CC) const {
  921. if (N.getOpcode() == ISD::SETCC) {
  922. LHS = N.getOperand(0);
  923. RHS = N.getOperand(1);
  924. CC = N.getOperand(2);
  925. return true;
  926. }
  927. if (N.getOpcode() != ISD::SELECT_CC ||
  928. !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
  929. !TLI.isConstFalseVal(N.getOperand(3).getNode()))
  930. return false;
  931. if (TLI.getBooleanContents(N.getValueType()) ==
  932. TargetLowering::UndefinedBooleanContent)
  933. return false;
  934. LHS = N.getOperand(0);
  935. RHS = N.getOperand(1);
  936. CC = N.getOperand(4);
  937. return true;
  938. }
  939. /// Return true if this is a SetCC-equivalent operation with only one use.
  940. /// If this is true, it allows the users to invert the operation for free when
  941. /// it is profitable to do so.
  942. bool DAGCombiner::isOneUseSetCC(SDValue N) const {
  943. SDValue N0, N1, N2;
  944. if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
  945. return true;
  946. return false;
  947. }
  948. // Returns the SDNode if it is a constant float BuildVector
  949. // or constant float.
  950. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
  951. if (isa<ConstantFPSDNode>(N))
  952. return N.getNode();
  953. if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
  954. return N.getNode();
  955. return nullptr;
  956. }
  957. // Determines if it is a constant integer or a build vector of constant
  958. // integers (and undefs).
  959. // Do not permit build vector implicit truncation.
  960. static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
  961. if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
  962. return !(Const->isOpaque() && NoOpaques);
  963. if (N.getOpcode() != ISD::BUILD_VECTOR)
  964. return false;
  965. unsigned BitWidth = N.getScalarValueSizeInBits();
  966. for (const SDValue &Op : N->op_values()) {
  967. if (Op.isUndef())
  968. continue;
  969. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
  970. if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
  971. (Const->isOpaque() && NoOpaques))
  972. return false;
  973. }
  974. return true;
  975. }
  976. // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
  977. // undef's.
  978. static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
  979. if (V.getOpcode() != ISD::BUILD_VECTOR)
  980. return false;
  981. return isConstantOrConstantVector(V, NoOpaques) ||
  982. ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
  983. }
  984. bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
  985. const SDLoc &DL,
  986. SDValue N0,
  987. SDValue N1) {
  988. // Currently this only tries to ensure we don't undo the GEP splits done by
  989. // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
  990. // we check if the following transformation would be problematic:
  991. // (load/store (add, (add, x, offset1), offset2)) ->
  992. // (load/store (add, x, offset1+offset2)).
  993. if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
  994. return false;
  995. if (N0.hasOneUse())
  996. return false;
  997. auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  998. auto *C2 = dyn_cast<ConstantSDNode>(N1);
  999. if (!C1 || !C2)
  1000. return false;
  1001. const APInt &C1APIntVal = C1->getAPIntValue();
  1002. const APInt &C2APIntVal = C2->getAPIntValue();
  1003. if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
  1004. return false;
  1005. const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
  1006. if (CombinedValueIntVal.getBitWidth() > 64)
  1007. return false;
  1008. const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
  1009. for (SDNode *Node : N0->uses()) {
  1010. auto LoadStore = dyn_cast<MemSDNode>(Node);
  1011. if (LoadStore) {
  1012. // Is x[offset2] already not a legal addressing mode? If so then
  1013. // reassociating the constants breaks nothing (we test offset2 because
  1014. // that's the one we hope to fold into the load or store).
  1015. TargetLoweringBase::AddrMode AM;
  1016. AM.HasBaseReg = true;
  1017. AM.BaseOffs = C2APIntVal.getSExtValue();
  1018. EVT VT = LoadStore->getMemoryVT();
  1019. unsigned AS = LoadStore->getAddressSpace();
  1020. Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
  1021. if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
  1022. continue;
  1023. // Would x[offset1+offset2] still be a legal addressing mode?
  1024. AM.BaseOffs = CombinedValue;
  1025. if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
  1026. return true;
  1027. }
  1028. }
  1029. return false;
  1030. }
  1031. // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
  1032. // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
  1033. SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
  1034. SDValue N0, SDValue N1) {
  1035. EVT VT = N0.getValueType();
  1036. if (N0.getOpcode() != Opc)
  1037. return SDValue();
  1038. // Don't reassociate reductions.
  1039. if (N0->getFlags().hasVectorReduction())
  1040. return SDValue();
  1041. if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
  1042. if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
  1043. // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
  1044. if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
  1045. return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
  1046. return SDValue();
  1047. }
  1048. if (N0.hasOneUse()) {
  1049. // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
  1050. // iff (op x, c1) has one use
  1051. SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
  1052. if (!OpNode.getNode())
  1053. return SDValue();
  1054. return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
  1055. }
  1056. }
  1057. return SDValue();
  1058. }
  1059. // Try to reassociate commutative binops.
  1060. SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
  1061. SDValue N1, SDNodeFlags Flags) {
  1062. assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
  1063. // Don't reassociate reductions.
  1064. if (Flags.hasVectorReduction())
  1065. return SDValue();
  1066. // Floating-point reassociation is not allowed without loose FP math.
  1067. if (N0.getValueType().isFloatingPoint() ||
  1068. N1.getValueType().isFloatingPoint())
  1069. if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
  1070. return SDValue();
  1071. if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
  1072. return Combined;
  1073. if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
  1074. return Combined;
  1075. return SDValue();
  1076. }
  1077. SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
  1078. bool AddTo) {
  1079. assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
  1080. ++NodesCombined;
  1081. LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
  1082. To[0].getNode()->dump(&DAG);
  1083. dbgs() << " and " << NumTo - 1 << " other values\n");
  1084. for (unsigned i = 0, e = NumTo; i != e; ++i)
  1085. assert((!To[i].getNode() ||
  1086. N->getValueType(i) == To[i].getValueType()) &&
  1087. "Cannot combine value to value of different type!");
  1088. WorklistRemover DeadNodes(*this);
  1089. DAG.ReplaceAllUsesWith(N, To);
  1090. if (AddTo) {
  1091. // Push the new nodes and any users onto the worklist
  1092. for (unsigned i = 0, e = NumTo; i != e; ++i) {
  1093. if (To[i].getNode()) {
  1094. AddToWorklist(To[i].getNode());
  1095. AddUsersToWorklist(To[i].getNode());
  1096. }
  1097. }
  1098. }
  1099. // Finally, if the node is now dead, remove it from the graph. The node
  1100. // may not be dead if the replacement process recursively simplified to
  1101. // something else needing this node.
  1102. if (N->use_empty())
  1103. deleteAndRecombine(N);
  1104. return SDValue(N, 0);
  1105. }
  1106. void DAGCombiner::
  1107. CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  1108. // Replace all uses. If any nodes become isomorphic to other nodes and
  1109. // are deleted, make sure to remove them from our worklist.
  1110. WorklistRemover DeadNodes(*this);
  1111. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
  1112. // Push the new node and any (possibly new) users onto the worklist.
  1113. AddToWorklist(TLO.New.getNode());
  1114. AddUsersToWorklist(TLO.New.getNode());
  1115. // Finally, if the node is now dead, remove it from the graph. The node
  1116. // may not be dead if the replacement process recursively simplified to
  1117. // something else needing this node.
  1118. if (TLO.Old.getNode()->use_empty())
  1119. deleteAndRecombine(TLO.Old.getNode());
  1120. }
  1121. /// Check the specified integer node value to see if it can be simplified or if
  1122. /// things it uses can be simplified by bit propagation. If so, return true.
  1123. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  1124. const APInt &DemandedElts) {
  1125. TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
  1126. KnownBits Known;
  1127. if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
  1128. return false;
  1129. // Revisit the node.
  1130. AddToWorklist(Op.getNode());
  1131. // Replace the old value with the new one.
  1132. ++NodesCombined;
  1133. LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
  1134. dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
  1135. dbgs() << '\n');
  1136. CommitTargetLoweringOpt(TLO);
  1137. return true;
  1138. }
  1139. /// Check the specified vector node value to see if it can be simplified or
  1140. /// if things it uses can be simplified as it only uses some of the elements.
  1141. /// If so, return true.
  1142. bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
  1143. const APInt &DemandedElts,
  1144. bool AssumeSingleUse) {
  1145. TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
  1146. APInt KnownUndef, KnownZero;
  1147. if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
  1148. TLO, 0, AssumeSingleUse))
  1149. return false;
  1150. // Revisit the node.
  1151. AddToWorklist(Op.getNode());
  1152. // Replace the old value with the new one.
  1153. ++NodesCombined;
  1154. LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
  1155. dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
  1156. dbgs() << '\n');
  1157. CommitTargetLoweringOpt(TLO);
  1158. return true;
  1159. }
  1160. void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
  1161. SDLoc DL(Load);
  1162. EVT VT = Load->getValueType(0);
  1163. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
  1164. LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
  1165. Trunc.getNode()->dump(&DAG); dbgs() << '\n');
  1166. WorklistRemover DeadNodes(*this);
  1167. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
  1168. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
  1169. deleteAndRecombine(Load);
  1170. AddToWorklist(Trunc.getNode());
  1171. }
  1172. SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
  1173. Replace = false;
  1174. SDLoc DL(Op);
  1175. if (ISD::isUNINDEXEDLoad(Op.getNode())) {
  1176. LoadSDNode *LD = cast<LoadSDNode>(Op);
  1177. EVT MemVT = LD->getMemoryVT();
  1178. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
  1179. : LD->getExtensionType();
  1180. Replace = true;
  1181. return DAG.getExtLoad(ExtType, DL, PVT,
  1182. LD->getChain(), LD->getBasePtr(),
  1183. MemVT, LD->getMemOperand());
  1184. }
  1185. unsigned Opc = Op.getOpcode();
  1186. switch (Opc) {
  1187. default: break;
  1188. case ISD::AssertSext:
  1189. if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
  1190. return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
  1191. break;
  1192. case ISD::AssertZext:
  1193. if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
  1194. return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
  1195. break;
  1196. case ISD::Constant: {
  1197. unsigned ExtOpc =
  1198. Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
  1199. return DAG.getNode(ExtOpc, DL, PVT, Op);
  1200. }
  1201. }
  1202. if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
  1203. return SDValue();
  1204. return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
  1205. }
  1206. SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
  1207. if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
  1208. return SDValue();
  1209. EVT OldVT = Op.getValueType();
  1210. SDLoc DL(Op);
  1211. bool Replace = false;
  1212. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  1213. if (!NewOp.getNode())
  1214. return SDValue();
  1215. AddToWorklist(NewOp.getNode());
  1216. if (Replace)
  1217. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  1218. return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
  1219. DAG.getValueType(OldVT));
  1220. }
  1221. SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
  1222. EVT OldVT = Op.getValueType();
  1223. SDLoc DL(Op);
  1224. bool Replace = false;
  1225. SDValue NewOp = PromoteOperand(Op, PVT, Replace);
  1226. if (!NewOp.getNode())
  1227. return SDValue();
  1228. AddToWorklist(NewOp.getNode());
  1229. if (Replace)
  1230. ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
  1231. return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
  1232. }
  1233. /// Promote the specified integer binary operation if the target indicates it is
  1234. /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
  1235. /// i32 since i16 instructions are longer.
  1236. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
  1237. if (!LegalOperations)
  1238. return SDValue();
  1239. EVT VT = Op.getValueType();
  1240. if (VT.isVector() || !VT.isInteger())
  1241. return SDValue();
  1242. // If operation type is 'undesirable', e.g. i16 on x86, consider
  1243. // promoting it.
  1244. unsigned Opc = Op.getOpcode();
  1245. if (TLI.isTypeDesirableForOp(Opc, VT))
  1246. return SDValue();
  1247. EVT PVT = VT;
  1248. // Consult target whether it is a good idea to promote this operation and
  1249. // what's the right type to promote it to.
  1250. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  1251. assert(PVT != VT && "Don't know what type to promote to!");
  1252. LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
  1253. bool Replace0 = false;
  1254. SDValue N0 = Op.getOperand(0);
  1255. SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
  1256. bool Replace1 = false;
  1257. SDValue N1 = Op.getOperand(1);
  1258. SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
  1259. SDLoc DL(Op);
  1260. SDValue RV =
  1261. DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
  1262. // We are always replacing N0/N1's use in N and only need
  1263. // additional replacements if there are additional uses.
  1264. Replace0 &= !N0->hasOneUse();
  1265. Replace1 &= (N0 != N1) && !N1->hasOneUse();
  1266. // Combine Op here so it is preserved past replacements.
  1267. CombineTo(Op.getNode(), RV);
  1268. // If operands have a use ordering, make sure we deal with
  1269. // predecessor first.
  1270. if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
  1271. std::swap(N0, N1);
  1272. std::swap(NN0, NN1);
  1273. }
  1274. if (Replace0) {
  1275. AddToWorklist(NN0.getNode());
  1276. ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
  1277. }
  1278. if (Replace1) {
  1279. AddToWorklist(NN1.getNode());
  1280. ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
  1281. }
  1282. return Op;
  1283. }
  1284. return SDValue();
  1285. }
  1286. /// Promote the specified integer shift operation if the target indicates it is
  1287. /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
  1288. /// i32 since i16 instructions are longer.
  1289. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
  1290. if (!LegalOperations)
  1291. return SDValue();
  1292. EVT VT = Op.getValueType();
  1293. if (VT.isVector() || !VT.isInteger())
  1294. return SDValue();
  1295. // If operation type is 'undesirable', e.g. i16 on x86, consider
  1296. // promoting it.
  1297. unsigned Opc = Op.getOpcode();
  1298. if (TLI.isTypeDesirableForOp(Opc, VT))
  1299. return SDValue();
  1300. EVT PVT = VT;
  1301. // Consult target whether it is a good idea to promote this operation and
  1302. // what's the right type to promote it to.
  1303. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  1304. assert(PVT != VT && "Don't know what type to promote to!");
  1305. LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
  1306. bool Replace = false;
  1307. SDValue N0 = Op.getOperand(0);
  1308. SDValue N1 = Op.getOperand(1);
  1309. if (Opc == ISD::SRA)
  1310. N0 = SExtPromoteOperand(N0, PVT);
  1311. else if (Opc == ISD::SRL)
  1312. N0 = ZExtPromoteOperand(N0, PVT);
  1313. else
  1314. N0 = PromoteOperand(N0, PVT, Replace);
  1315. if (!N0.getNode())
  1316. return SDValue();
  1317. SDLoc DL(Op);
  1318. SDValue RV =
  1319. DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
  1320. if (Replace)
  1321. ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
  1322. // Deal with Op being deleted.
  1323. if (Op && Op.getOpcode() != ISD::DELETED_NODE)
  1324. return RV;
  1325. }
  1326. return SDValue();
  1327. }
  1328. SDValue DAGCombiner::PromoteExtend(SDValue Op) {
  1329. if (!LegalOperations)
  1330. return SDValue();
  1331. EVT VT = Op.getValueType();
  1332. if (VT.isVector() || !VT.isInteger())
  1333. return SDValue();
  1334. // If operation type is 'undesirable', e.g. i16 on x86, consider
  1335. // promoting it.
  1336. unsigned Opc = Op.getOpcode();
  1337. if (TLI.isTypeDesirableForOp(Opc, VT))
  1338. return SDValue();
  1339. EVT PVT = VT;
  1340. // Consult target whether it is a good idea to promote this operation and
  1341. // what's the right type to promote it to.
  1342. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  1343. assert(PVT != VT && "Don't know what type to promote to!");
  1344. // fold (aext (aext x)) -> (aext x)
  1345. // fold (aext (zext x)) -> (zext x)
  1346. // fold (aext (sext x)) -> (sext x)
  1347. LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
  1348. return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
  1349. }
  1350. return SDValue();
  1351. }
  1352. bool DAGCombiner::PromoteLoad(SDValue Op) {
  1353. if (!LegalOperations)
  1354. return false;
  1355. if (!ISD::isUNINDEXEDLoad(Op.getNode()))
  1356. return false;
  1357. EVT VT = Op.getValueType();
  1358. if (VT.isVector() || !VT.isInteger())
  1359. return false;
  1360. // If operation type is 'undesirable', e.g. i16 on x86, consider
  1361. // promoting it.
  1362. unsigned Opc = Op.getOpcode();
  1363. if (TLI.isTypeDesirableForOp(Opc, VT))
  1364. return false;
  1365. EVT PVT = VT;
  1366. // Consult target whether it is a good idea to promote this operation and
  1367. // what's the right type to promote it to.
  1368. if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
  1369. assert(PVT != VT && "Don't know what type to promote to!");
  1370. SDLoc DL(Op);
  1371. SDNode *N = Op.getNode();
  1372. LoadSDNode *LD = cast<LoadSDNode>(N);
  1373. EVT MemVT = LD->getMemoryVT();
  1374. ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
  1375. : LD->getExtensionType();
  1376. SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
  1377. LD->getChain(), LD->getBasePtr(),
  1378. MemVT, LD->getMemOperand());
  1379. SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
  1380. LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
  1381. Result.getNode()->dump(&DAG); dbgs() << '\n');
  1382. WorklistRemover DeadNodes(*this);
  1383. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  1384. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
  1385. deleteAndRecombine(N);
  1386. AddToWorklist(Result.getNode());
  1387. return true;
  1388. }
  1389. return false;
  1390. }
  1391. /// Recursively delete a node which has no uses and any operands for
  1392. /// which it is the only use.
  1393. ///
  1394. /// Note that this both deletes the nodes and removes them from the worklist.
  1395. /// It also adds any nodes who have had a user deleted to the worklist as they
  1396. /// may now have only one use and subject to other combines.
  1397. bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
  1398. if (!N->use_empty())
  1399. return false;
  1400. SmallSetVector<SDNode *, 16> Nodes;
  1401. Nodes.insert(N);
  1402. do {
  1403. N = Nodes.pop_back_val();
  1404. if (!N)
  1405. continue;
  1406. if (N->use_empty()) {
  1407. for (const SDValue &ChildN : N->op_values())
  1408. Nodes.insert(ChildN.getNode());
  1409. removeFromWorklist(N);
  1410. DAG.DeleteNode(N);
  1411. } else {
  1412. AddToWorklist(N);
  1413. }
  1414. } while (!Nodes.empty());
  1415. return true;
  1416. }
  1417. //===----------------------------------------------------------------------===//
  1418. // Main DAG Combiner implementation
  1419. //===----------------------------------------------------------------------===//
  1420. void DAGCombiner::Run(CombineLevel AtLevel) {
  1421. // set the instance variables, so that the various visit routines may use it.
  1422. Level = AtLevel;
  1423. LegalOperations = Level >= AfterLegalizeVectorOps;
  1424. LegalTypes = Level >= AfterLegalizeTypes;
  1425. WorklistInserter AddNodes(*this);
  1426. // Add all the dag nodes to the worklist.
  1427. for (SDNode &Node : DAG.allnodes())
  1428. AddToWorklist(&Node);
  1429. // Create a dummy node (which is not added to allnodes), that adds a reference
  1430. // to the root node, preventing it from being deleted, and tracking any
  1431. // changes of the root.
  1432. HandleSDNode Dummy(DAG.getRoot());
  1433. // While we have a valid worklist entry node, try to combine it.
  1434. while (SDNode *N = getNextWorklistEntry()) {
  1435. // If N has no uses, it is dead. Make sure to revisit all N's operands once
  1436. // N is deleted from the DAG, since they too may now be dead or may have a
  1437. // reduced number of uses, allowing other xforms.
  1438. if (recursivelyDeleteUnusedNodes(N))
  1439. continue;
  1440. WorklistRemover DeadNodes(*this);
  1441. // If this combine is running after legalizing the DAG, re-legalize any
  1442. // nodes pulled off the worklist.
  1443. if (Level == AfterLegalizeDAG) {
  1444. SmallSetVector<SDNode *, 16> UpdatedNodes;
  1445. bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
  1446. for (SDNode *LN : UpdatedNodes) {
  1447. AddUsersToWorklist(LN);
  1448. AddToWorklist(LN);
  1449. }
  1450. if (!NIsValid)
  1451. continue;
  1452. }
  1453. LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
  1454. // Add any operands of the new node which have not yet been combined to the
  1455. // worklist as well. Because the worklist uniques things already, this
  1456. // won't repeatedly process the same operand.
  1457. CombinedNodes.insert(N);
  1458. for (const SDValue &ChildN : N->op_values())
  1459. if (!CombinedNodes.count(ChildN.getNode()))
  1460. AddToWorklist(ChildN.getNode());
  1461. SDValue RV = combine(N);
  1462. if (!RV.getNode())
  1463. continue;
  1464. ++NodesCombined;
  1465. // If we get back the same node we passed in, rather than a new node or
  1466. // zero, we know that the node must have defined multiple values and
  1467. // CombineTo was used. Since CombineTo takes care of the worklist
  1468. // mechanics for us, we have no work to do in this case.
  1469. if (RV.getNode() == N)
  1470. continue;
  1471. assert(N->getOpcode() != ISD::DELETED_NODE &&
  1472. RV.getOpcode() != ISD::DELETED_NODE &&
  1473. "Node was deleted but visit returned new node!");
  1474. LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
  1475. if (N->getNumValues() == RV.getNode()->getNumValues())
  1476. DAG.ReplaceAllUsesWith(N, RV.getNode());
  1477. else {
  1478. assert(N->getValueType(0) == RV.getValueType() &&
  1479. N->getNumValues() == 1 && "Type mismatch");
  1480. DAG.ReplaceAllUsesWith(N, &RV);
  1481. }
  1482. // Push the new node and any users onto the worklist
  1483. AddToWorklist(RV.getNode());
  1484. AddUsersToWorklist(RV.getNode());
  1485. // Finally, if the node is now dead, remove it from the graph. The node
  1486. // may not be dead if the replacement process recursively simplified to
  1487. // something else needing this node. This will also take care of adding any
  1488. // operands which have lost a user to the worklist.
  1489. recursivelyDeleteUnusedNodes(N);
  1490. }
  1491. // If the root changed (e.g. it was a dead load, update the root).
  1492. DAG.setRoot(Dummy.getValue());
  1493. DAG.RemoveDeadNodes();
  1494. }
  1495. SDValue DAGCombiner::visit(SDNode *N) {
  1496. switch (N->getOpcode()) {
  1497. default: break;
  1498. case ISD::TokenFactor: return visitTokenFactor(N);
  1499. case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
  1500. case ISD::ADD: return visitADD(N);
  1501. case ISD::SUB: return visitSUB(N);
  1502. case ISD::SADDSAT:
  1503. case ISD::UADDSAT: return visitADDSAT(N);
  1504. case ISD::SSUBSAT:
  1505. case ISD::USUBSAT: return visitSUBSAT(N);
  1506. case ISD::ADDC: return visitADDC(N);
  1507. case ISD::SADDO:
  1508. case ISD::UADDO: return visitADDO(N);
  1509. case ISD::SUBC: return visitSUBC(N);
  1510. case ISD::SSUBO:
  1511. case ISD::USUBO: return visitSUBO(N);
  1512. case ISD::ADDE: return visitADDE(N);
  1513. case ISD::ADDCARRY: return visitADDCARRY(N);
  1514. case ISD::SUBE: return visitSUBE(N);
  1515. case ISD::SUBCARRY: return visitSUBCARRY(N);
  1516. case ISD::SMULFIX:
  1517. case ISD::SMULFIXSAT:
  1518. case ISD::UMULFIX:
  1519. case ISD::UMULFIXSAT: return visitMULFIX(N);
  1520. case ISD::MUL: return visitMUL(N);
  1521. case ISD::SDIV: return visitSDIV(N);
  1522. case ISD::UDIV: return visitUDIV(N);
  1523. case ISD::SREM:
  1524. case ISD::UREM: return visitREM(N);
  1525. case ISD::MULHU: return visitMULHU(N);
  1526. case ISD::MULHS: return visitMULHS(N);
  1527. case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
  1528. case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
  1529. case ISD::SMULO:
  1530. case ISD::UMULO: return visitMULO(N);
  1531. case ISD::SMIN:
  1532. case ISD::SMAX:
  1533. case ISD::UMIN:
  1534. case ISD::UMAX: return visitIMINMAX(N);
  1535. case ISD::AND: return visitAND(N);
  1536. case ISD::OR: return visitOR(N);
  1537. case ISD::XOR: return visitXOR(N);
  1538. case ISD::SHL: return visitSHL(N);
  1539. case ISD::SRA: return visitSRA(N);
  1540. case ISD::SRL: return visitSRL(N);
  1541. case ISD::ROTR:
  1542. case ISD::ROTL: return visitRotate(N);
  1543. case ISD::FSHL:
  1544. case ISD::FSHR: return visitFunnelShift(N);
  1545. case ISD::ABS: return visitABS(N);
  1546. case ISD::BSWAP: return visitBSWAP(N);
  1547. case ISD::BITREVERSE: return visitBITREVERSE(N);
  1548. case ISD::CTLZ: return visitCTLZ(N);
  1549. case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
  1550. case ISD::CTTZ: return visitCTTZ(N);
  1551. case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
  1552. case ISD::CTPOP: return visitCTPOP(N);
  1553. case ISD::SELECT: return visitSELECT(N);
  1554. case ISD::VSELECT: return visitVSELECT(N);
  1555. case ISD::SELECT_CC: return visitSELECT_CC(N);
  1556. case ISD::SETCC: return visitSETCC(N);
  1557. case ISD::SETCCCARRY: return visitSETCCCARRY(N);
  1558. case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
  1559. case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
  1560. case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
  1561. case ISD::AssertSext:
  1562. case ISD::AssertZext: return visitAssertExt(N);
  1563. case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
  1564. case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
  1565. case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
  1566. case ISD::TRUNCATE: return visitTRUNCATE(N);
  1567. case ISD::BITCAST: return visitBITCAST(N);
  1568. case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
  1569. case ISD::FADD: return visitFADD(N);
  1570. case ISD::FSUB: return visitFSUB(N);
  1571. case ISD::FMUL: return visitFMUL(N);
  1572. case ISD::FMA: return visitFMA(N);
  1573. case ISD::FDIV: return visitFDIV(N);
  1574. case ISD::FREM: return visitFREM(N);
  1575. case ISD::FSQRT: return visitFSQRT(N);
  1576. case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
  1577. case ISD::FPOW: return visitFPOW(N);
  1578. case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
  1579. case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
  1580. case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
  1581. case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
  1582. case ISD::FP_ROUND: return visitFP_ROUND(N);
  1583. case ISD::FP_EXTEND: return visitFP_EXTEND(N);
  1584. case ISD::FNEG: return visitFNEG(N);
  1585. case ISD::FABS: return visitFABS(N);
  1586. case ISD::FFLOOR: return visitFFLOOR(N);
  1587. case ISD::FMINNUM: return visitFMINNUM(N);
  1588. case ISD::FMAXNUM: return visitFMAXNUM(N);
  1589. case ISD::FMINIMUM: return visitFMINIMUM(N);
  1590. case ISD::FMAXIMUM: return visitFMAXIMUM(N);
  1591. case ISD::FCEIL: return visitFCEIL(N);
  1592. case ISD::FTRUNC: return visitFTRUNC(N);
  1593. case ISD::BRCOND: return visitBRCOND(N);
  1594. case ISD::BR_CC: return visitBR_CC(N);
  1595. case ISD::LOAD: return visitLOAD(N);
  1596. case ISD::STORE: return visitSTORE(N);
  1597. case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
  1598. case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
  1599. case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
  1600. case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
  1601. case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
  1602. case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
  1603. case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
  1604. case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
  1605. case ISD::MGATHER: return visitMGATHER(N);
  1606. case ISD::MLOAD: return visitMLOAD(N);
  1607. case ISD::MSCATTER: return visitMSCATTER(N);
  1608. case ISD::MSTORE: return visitMSTORE(N);
  1609. case ISD::LIFETIME_END: return visitLIFETIME_END(N);
  1610. case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
  1611. case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
  1612. case ISD::VECREDUCE_FADD:
  1613. case ISD::VECREDUCE_FMUL:
  1614. case ISD::VECREDUCE_ADD:
  1615. case ISD::VECREDUCE_MUL:
  1616. case ISD::VECREDUCE_AND:
  1617. case ISD::VECREDUCE_OR:
  1618. case ISD::VECREDUCE_XOR:
  1619. case ISD::VECREDUCE_SMAX:
  1620. case ISD::VECREDUCE_SMIN:
  1621. case ISD::VECREDUCE_UMAX:
  1622. case ISD::VECREDUCE_UMIN:
  1623. case ISD::VECREDUCE_FMAX:
  1624. case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
  1625. }
  1626. return SDValue();
  1627. }
  1628. SDValue DAGCombiner::combine(SDNode *N) {
  1629. SDValue RV = visit(N);
  1630. // If nothing happened, try a target-specific DAG combine.
  1631. if (!RV.getNode()) {
  1632. assert(N->getOpcode() != ISD::DELETED_NODE &&
  1633. "Node was deleted but visit returned NULL!");
  1634. if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
  1635. TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
  1636. // Expose the DAG combiner to the target combiner impls.
  1637. TargetLowering::DAGCombinerInfo
  1638. DagCombineInfo(DAG, Level, false, this);
  1639. RV = TLI.PerformDAGCombine(N, DagCombineInfo);
  1640. }
  1641. }
  1642. // If nothing happened still, try promoting the operation.
  1643. if (!RV.getNode()) {
  1644. switch (N->getOpcode()) {
  1645. default: break;
  1646. case ISD::ADD:
  1647. case ISD::SUB:
  1648. case ISD::MUL:
  1649. case ISD::AND:
  1650. case ISD::OR:
  1651. case ISD::XOR:
  1652. RV = PromoteIntBinOp(SDValue(N, 0));
  1653. break;
  1654. case ISD::SHL:
  1655. case ISD::SRA:
  1656. case ISD::SRL:
  1657. RV = PromoteIntShiftOp(SDValue(N, 0));
  1658. break;
  1659. case ISD::SIGN_EXTEND:
  1660. case ISD::ZERO_EXTEND:
  1661. case ISD::ANY_EXTEND:
  1662. RV = PromoteExtend(SDValue(N, 0));
  1663. break;
  1664. case ISD::LOAD:
  1665. if (PromoteLoad(SDValue(N, 0)))
  1666. RV = SDValue(N, 0);
  1667. break;
  1668. }
  1669. }
  1670. // If N is a commutative binary node, try to eliminate it if the commuted
  1671. // version is already present in the DAG.
  1672. if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
  1673. N->getNumValues() == 1) {
  1674. SDValue N0 = N->getOperand(0);
  1675. SDValue N1 = N->getOperand(1);
  1676. // Constant operands are canonicalized to RHS.
  1677. if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
  1678. SDValue Ops[] = {N1, N0};
  1679. SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
  1680. N->getFlags());
  1681. if (CSENode)
  1682. return SDValue(CSENode, 0);
  1683. }
  1684. }
  1685. return RV;
  1686. }
  1687. /// Given a node, return its input chain if it has one, otherwise return a null
  1688. /// sd operand.
  1689. static SDValue getInputChainForNode(SDNode *N) {
  1690. if (unsigned NumOps = N->getNumOperands()) {
  1691. if (N->getOperand(0).getValueType() == MVT::Other)
  1692. return N->getOperand(0);
  1693. if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
  1694. return N->getOperand(NumOps-1);
  1695. for (unsigned i = 1; i < NumOps-1; ++i)
  1696. if (N->getOperand(i).getValueType() == MVT::Other)
  1697. return N->getOperand(i);
  1698. }
  1699. return SDValue();
  1700. }
  1701. SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  1702. // If N has two operands, where one has an input chain equal to the other,
  1703. // the 'other' chain is redundant.
  1704. if (N->getNumOperands() == 2) {
  1705. if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
  1706. return N->getOperand(0);
  1707. if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
  1708. return N->getOperand(1);
  1709. }
  1710. // Don't simplify token factors if optnone.
  1711. if (OptLevel == CodeGenOpt::None)
  1712. return SDValue();
  1713. // If the sole user is a token factor, we should make sure we have a
  1714. // chance to merge them together. This prevents TF chains from inhibiting
  1715. // optimizations.
  1716. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
  1717. AddToWorklist(*(N->use_begin()));
  1718. SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
  1719. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
  1720. SmallPtrSet<SDNode*, 16> SeenOps;
  1721. bool Changed = false; // If we should replace this token factor.
  1722. // Start out with this token factor.
  1723. TFs.push_back(N);
  1724. // Iterate through token factors. The TFs grows when new token factors are
  1725. // encountered.
  1726. for (unsigned i = 0; i < TFs.size(); ++i) {
  1727. // Limit number of nodes to inline, to avoid quadratic compile times.
  1728. // We have to add the outstanding Token Factors to Ops, otherwise we might
  1729. // drop Ops from the resulting Token Factors.
  1730. if (Ops.size() > TokenFactorInlineLimit) {
  1731. for (unsigned j = i; j < TFs.size(); j++)
  1732. Ops.emplace_back(TFs[j], 0);
  1733. // Drop unprocessed Token Factors from TFs, so we do not add them to the
  1734. // combiner worklist later.
  1735. TFs.resize(i);
  1736. break;
  1737. }
  1738. SDNode *TF = TFs[i];
  1739. // Check each of the operands.
  1740. for (const SDValue &Op : TF->op_values()) {
  1741. switch (Op.getOpcode()) {
  1742. case ISD::EntryToken:
  1743. // Entry tokens don't need to be added to the list. They are
  1744. // redundant.
  1745. Changed = true;
  1746. break;
  1747. case ISD::TokenFactor:
  1748. if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
  1749. // Queue up for processing.
  1750. TFs.push_back(Op.getNode());
  1751. Changed = true;
  1752. break;
  1753. }
  1754. LLVM_FALLTHROUGH;
  1755. default:
  1756. // Only add if it isn't already in the list.
  1757. if (SeenOps.insert(Op.getNode()).second)
  1758. Ops.push_back(Op);
  1759. else
  1760. Changed = true;
  1761. break;
  1762. }
  1763. }
  1764. }
  1765. // Re-visit inlined Token Factors, to clean them up in case they have been
  1766. // removed. Skip the first Token Factor, as this is the current node.
  1767. for (unsigned i = 1, e = TFs.size(); i < e; i++)
  1768. AddToWorklist(TFs[i]);
  1769. // Remove Nodes that are chained to another node in the list. Do so
  1770. // by walking up chains breath-first stopping when we've seen
  1771. // another operand. In general we must climb to the EntryNode, but we can exit
  1772. // early if we find all remaining work is associated with just one operand as
  1773. // no further pruning is possible.
  1774. // List of nodes to search through and original Ops from which they originate.
  1775. SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
  1776. SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
  1777. SmallPtrSet<SDNode *, 16> SeenChains;
  1778. bool DidPruneOps = false;
  1779. unsigned NumLeftToConsider = 0;
  1780. for (const SDValue &Op : Ops) {
  1781. Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
  1782. OpWorkCount.push_back(1);
  1783. }
  1784. auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
  1785. // If this is an Op, we can remove the op from the list. Remark any
  1786. // search associated with it as from the current OpNumber.
  1787. if (SeenOps.count(Op) != 0) {
  1788. Changed = true;
  1789. DidPruneOps = true;
  1790. unsigned OrigOpNumber = 0;
  1791. while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
  1792. OrigOpNumber++;
  1793. assert((OrigOpNumber != Ops.size()) &&
  1794. "expected to find TokenFactor Operand");
  1795. // Re-mark worklist from OrigOpNumber to OpNumber
  1796. for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
  1797. if (Worklist[i].second == OrigOpNumber) {
  1798. Worklist[i].second = OpNumber;
  1799. }
  1800. }
  1801. OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
  1802. OpWorkCount[OrigOpNumber] = 0;
  1803. NumLeftToConsider--;
  1804. }
  1805. // Add if it's a new chain
  1806. if (SeenChains.insert(Op).second) {
  1807. OpWorkCount[OpNumber]++;
  1808. Worklist.push_back(std::make_pair(Op, OpNumber));
  1809. }
  1810. };
  1811. for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
  1812. // We need at least be consider at least 2 Ops to prune.
  1813. if (NumLeftToConsider <= 1)
  1814. break;
  1815. auto CurNode = Worklist[i].first;
  1816. auto CurOpNumber = Worklist[i].second;
  1817. assert((OpWorkCount[CurOpNumber] > 0) &&
  1818. "Node should not appear in worklist");
  1819. switch (CurNode->getOpcode()) {
  1820. case ISD::EntryToken:
  1821. // Hitting EntryToken is the only way for the search to terminate without
  1822. // hitting
  1823. // another operand's search. Prevent us from marking this operand
  1824. // considered.
  1825. NumLeftToConsider++;
  1826. break;
  1827. case ISD::TokenFactor:
  1828. for (const SDValue &Op : CurNode->op_values())
  1829. AddToWorklist(i, Op.getNode(), CurOpNumber);
  1830. break;
  1831. case ISD::LIFETIME_START:
  1832. case ISD::LIFETIME_END:
  1833. case ISD::CopyFromReg:
  1834. case ISD::CopyToReg:
  1835. AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
  1836. break;
  1837. default:
  1838. if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
  1839. AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
  1840. break;
  1841. }
  1842. OpWorkCount[CurOpNumber]--;
  1843. if (OpWorkCount[CurOpNumber] == 0)
  1844. NumLeftToConsider--;
  1845. }
  1846. // If we've changed things around then replace token factor.
  1847. if (Changed) {
  1848. SDValue Result;
  1849. if (Ops.empty()) {
  1850. // The entry token is the only possible outcome.
  1851. Result = DAG.getEntryNode();
  1852. } else {
  1853. if (DidPruneOps) {
  1854. SmallVector<SDValue, 8> PrunedOps;
  1855. //
  1856. for (const SDValue &Op : Ops) {
  1857. if (SeenChains.count(Op.getNode()) == 0)
  1858. PrunedOps.push_back(Op);
  1859. }
  1860. Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
  1861. } else {
  1862. Result = DAG.getTokenFactor(SDLoc(N), Ops);
  1863. }
  1864. }
  1865. return Result;
  1866. }
  1867. return SDValue();
  1868. }
  1869. /// MERGE_VALUES can always be eliminated.
  1870. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
  1871. WorklistRemover DeadNodes(*this);
  1872. // Replacing results may cause a different MERGE_VALUES to suddenly
  1873. // be CSE'd with N, and carry its uses with it. Iterate until no
  1874. // uses remain, to ensure that the node can be safely deleted.
  1875. // First add the users of this node to the work list so that they
  1876. // can be tried again once they have new operands.
  1877. AddUsersToWorklist(N);
  1878. do {
  1879. // Do as a single replacement to avoid rewalking use lists.
  1880. SmallVector<SDValue, 8> Ops;
  1881. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
  1882. Ops.push_back(N->getOperand(i));
  1883. DAG.ReplaceAllUsesWith(N, Ops.data());
  1884. } while (!N->use_empty());
  1885. deleteAndRecombine(N);
  1886. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  1887. }
  1888. /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
  1889. /// ConstantSDNode pointer else nullptr.
  1890. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
  1891. ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
  1892. return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
  1893. }
  1894. SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
  1895. assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
  1896. "Unexpected binary operator");
  1897. // Don't do this unless the old select is going away. We want to eliminate the
  1898. // binary operator, not replace a binop with a select.
  1899. // TODO: Handle ISD::SELECT_CC.
  1900. unsigned SelOpNo = 0;
  1901. SDValue Sel = BO->getOperand(0);
  1902. if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
  1903. SelOpNo = 1;
  1904. Sel = BO->getOperand(1);
  1905. }
  1906. if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
  1907. return SDValue();
  1908. SDValue CT = Sel.getOperand(1);
  1909. if (!isConstantOrConstantVector(CT, true) &&
  1910. !isConstantFPBuildVectorOrConstantFP(CT))
  1911. return SDValue();
  1912. SDValue CF = Sel.getOperand(2);
  1913. if (!isConstantOrConstantVector(CF, true) &&
  1914. !isConstantFPBuildVectorOrConstantFP(CF))
  1915. return SDValue();
  1916. // Bail out if any constants are opaque because we can't constant fold those.
  1917. // The exception is "and" and "or" with either 0 or -1 in which case we can
  1918. // propagate non constant operands into select. I.e.:
  1919. // and (select Cond, 0, -1), X --> select Cond, 0, X
  1920. // or X, (select Cond, -1, 0) --> select Cond, -1, X
  1921. auto BinOpcode = BO->getOpcode();
  1922. bool CanFoldNonConst =
  1923. (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
  1924. (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
  1925. (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
  1926. SDValue CBO = BO->getOperand(SelOpNo ^ 1);
  1927. if (!CanFoldNonConst &&
  1928. !isConstantOrConstantVector(CBO, true) &&
  1929. !isConstantFPBuildVectorOrConstantFP(CBO))
  1930. return SDValue();
  1931. EVT VT = Sel.getValueType();
  1932. // In case of shift value and shift amount may have different VT. For instance
  1933. // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
  1934. // swapped operands and value types do not match. NB: x86 is fine if operands
  1935. // are not swapped with shift amount VT being not bigger than shifted value.
  1936. // TODO: that is possible to check for a shift operation, correct VTs and
  1937. // still perform optimization on x86 if needed.
  1938. if (SelOpNo && VT != CBO.getValueType())
  1939. return SDValue();
  1940. // We have a select-of-constants followed by a binary operator with a
  1941. // constant. Eliminate the binop by pulling the constant math into the select.
  1942. // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
  1943. SDLoc DL(Sel);
  1944. SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
  1945. : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
  1946. if (!CanFoldNonConst && !NewCT.isUndef() &&
  1947. !isConstantOrConstantVector(NewCT, true) &&
  1948. !isConstantFPBuildVectorOrConstantFP(NewCT))
  1949. return SDValue();
  1950. SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
  1951. : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
  1952. if (!CanFoldNonConst && !NewCF.isUndef() &&
  1953. !isConstantOrConstantVector(NewCF, true) &&
  1954. !isConstantFPBuildVectorOrConstantFP(NewCF))
  1955. return SDValue();
  1956. SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
  1957. SelectOp->setFlags(BO->getFlags());
  1958. return SelectOp;
  1959. }
  1960. static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
  1961. assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
  1962. "Expecting add or sub");
  1963. // Match a constant operand and a zext operand for the math instruction:
  1964. // add Z, C
  1965. // sub C, Z
  1966. bool IsAdd = N->getOpcode() == ISD::ADD;
  1967. SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
  1968. SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
  1969. auto *CN = dyn_cast<ConstantSDNode>(C);
  1970. if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
  1971. return SDValue();
  1972. // Match the zext operand as a setcc of a boolean.
  1973. if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
  1974. Z.getOperand(0).getValueType() != MVT::i1)
  1975. return SDValue();
  1976. // Match the compare as: setcc (X & 1), 0, eq.
  1977. SDValue SetCC = Z.getOperand(0);
  1978. ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
  1979. if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
  1980. SetCC.getOperand(0).getOpcode() != ISD::AND ||
  1981. !isOneConstant(SetCC.getOperand(0).getOperand(1)))
  1982. return SDValue();
  1983. // We are adding/subtracting a constant and an inverted low bit. Turn that
  1984. // into a subtract/add of the low bit with incremented/decremented constant:
  1985. // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
  1986. // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
  1987. EVT VT = C.getValueType();
  1988. SDLoc DL(N);
  1989. SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
  1990. SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
  1991. DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
  1992. return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
  1993. }
  1994. /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
  1995. /// a shift and add with a different constant.
  1996. static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
  1997. assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
  1998. "Expecting add or sub");
  1999. // We need a constant operand for the add/sub, and the other operand is a
  2000. // logical shift right: add (srl), C or sub C, (srl).
  2001. // TODO - support non-uniform vector amounts.
  2002. bool IsAdd = N->getOpcode() == ISD::ADD;
  2003. SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
  2004. SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
  2005. ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
  2006. if (!C || ShiftOp.getOpcode() != ISD::SRL)
  2007. return SDValue();
  2008. // The shift must be of a 'not' value.
  2009. SDValue Not = ShiftOp.getOperand(0);
  2010. if (!Not.hasOneUse() || !isBitwiseNot(Not))
  2011. return SDValue();
  2012. // The shift must be moving the sign bit to the least-significant-bit.
  2013. EVT VT = ShiftOp.getValueType();
  2014. SDValue ShAmt = ShiftOp.getOperand(1);
  2015. ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
  2016. if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
  2017. return SDValue();
  2018. // Eliminate the 'not' by adjusting the shift and add/sub constant:
  2019. // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
  2020. // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
  2021. SDLoc DL(N);
  2022. auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
  2023. SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
  2024. APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
  2025. return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
  2026. }
  2027. /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
  2028. /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
  2029. /// are no common bits set in the operands).
  2030. SDValue DAGCombiner::visitADDLike(SDNode *N) {
  2031. SDValue N0 = N->getOperand(0);
  2032. SDValue N1 = N->getOperand(1);
  2033. EVT VT = N0.getValueType();
  2034. SDLoc DL(N);
  2035. // fold vector ops
  2036. if (VT.isVector()) {
  2037. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  2038. return FoldedVOp;
  2039. // fold (add x, 0) -> x, vector edition
  2040. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2041. return N0;
  2042. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  2043. return N1;
  2044. }
  2045. // fold (add x, undef) -> undef
  2046. if (N0.isUndef())
  2047. return N0;
  2048. if (N1.isUndef())
  2049. return N1;
  2050. if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
  2051. // canonicalize constant to RHS
  2052. if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
  2053. return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
  2054. // fold (add c1, c2) -> c1+c2
  2055. return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
  2056. N1.getNode());
  2057. }
  2058. // fold (add x, 0) -> x
  2059. if (isNullConstant(N1))
  2060. return N0;
  2061. if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
  2062. // fold ((A-c1)+c2) -> (A+(c2-c1))
  2063. if (N0.getOpcode() == ISD::SUB &&
  2064. isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
  2065. SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
  2066. N0.getOperand(1).getNode());
  2067. assert(Sub && "Constant folding failed");
  2068. return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
  2069. }
  2070. // fold ((c1-A)+c2) -> (c1+c2)-A
  2071. if (N0.getOpcode() == ISD::SUB &&
  2072. isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
  2073. SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
  2074. N0.getOperand(0).getNode());
  2075. assert(Add && "Constant folding failed");
  2076. return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
  2077. }
  2078. // add (sext i1 X), 1 -> zext (not i1 X)
  2079. // We don't transform this pattern:
  2080. // add (zext i1 X), -1 -> sext (not i1 X)
  2081. // because most (?) targets generate better code for the zext form.
  2082. if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
  2083. isOneOrOneSplat(N1)) {
  2084. SDValue X = N0.getOperand(0);
  2085. if ((!LegalOperations ||
  2086. (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
  2087. TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
  2088. X.getScalarValueSizeInBits() == 1) {
  2089. SDValue Not = DAG.getNOT(DL, X, X.getValueType());
  2090. return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
  2091. }
  2092. }
  2093. // Undo the add -> or combine to merge constant offsets from a frame index.
  2094. if (N0.getOpcode() == ISD::OR &&
  2095. isa<FrameIndexSDNode>(N0.getOperand(0)) &&
  2096. isa<ConstantSDNode>(N0.getOperand(1)) &&
  2097. DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
  2098. SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
  2099. return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
  2100. }
  2101. }
  2102. if (SDValue NewSel = foldBinOpIntoSelect(N))
  2103. return NewSel;
  2104. // reassociate add
  2105. if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
  2106. if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
  2107. return RADD;
  2108. }
  2109. // fold ((0-A) + B) -> B-A
  2110. if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
  2111. return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
  2112. // fold (A + (0-B)) -> A-B
  2113. if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
  2114. return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
  2115. // fold (A+(B-A)) -> B
  2116. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
  2117. return N1.getOperand(0);
  2118. // fold ((B-A)+A) -> B
  2119. if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
  2120. return N0.getOperand(0);
  2121. // fold ((A-B)+(C-A)) -> (C-B)
  2122. if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
  2123. N0.getOperand(0) == N1.getOperand(1))
  2124. return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
  2125. N0.getOperand(1));
  2126. // fold ((A-B)+(B-C)) -> (A-C)
  2127. if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
  2128. N0.getOperand(1) == N1.getOperand(0))
  2129. return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
  2130. N1.getOperand(1));
  2131. // fold (A+(B-(A+C))) to (B-C)
  2132. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  2133. N0 == N1.getOperand(1).getOperand(0))
  2134. return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
  2135. N1.getOperand(1).getOperand(1));
  2136. // fold (A+(B-(C+A))) to (B-C)
  2137. if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
  2138. N0 == N1.getOperand(1).getOperand(1))
  2139. return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
  2140. N1.getOperand(1).getOperand(0));
  2141. // fold (A+((B-A)+or-C)) to (B+or-C)
  2142. if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
  2143. N1.getOperand(0).getOpcode() == ISD::SUB &&
  2144. N0 == N1.getOperand(0).getOperand(1))
  2145. return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
  2146. N1.getOperand(1));
  2147. // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
  2148. if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
  2149. SDValue N00 = N0.getOperand(0);
  2150. SDValue N01 = N0.getOperand(1);
  2151. SDValue N10 = N1.getOperand(0);
  2152. SDValue N11 = N1.getOperand(1);
  2153. if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
  2154. return DAG.getNode(ISD::SUB, DL, VT,
  2155. DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
  2156. DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
  2157. }
  2158. // fold (add (umax X, C), -C) --> (usubsat X, C)
  2159. if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
  2160. auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
  2161. return (!Max && !Op) ||
  2162. (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
  2163. };
  2164. if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
  2165. /*AllowUndefs*/ true))
  2166. return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
  2167. N0.getOperand(1));
  2168. }
  2169. if (SimplifyDemandedBits(SDValue(N, 0)))
  2170. return SDValue(N, 0);
  2171. if (isOneOrOneSplat(N1)) {
  2172. // fold (add (xor a, -1), 1) -> (sub 0, a)
  2173. if (isBitwiseNot(N0))
  2174. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
  2175. N0.getOperand(0));
  2176. // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
  2177. if (N0.getOpcode() == ISD::ADD ||
  2178. N0.getOpcode() == ISD::UADDO ||
  2179. N0.getOpcode() == ISD::SADDO) {
  2180. SDValue A, Xor;
  2181. if (isBitwiseNot(N0.getOperand(0))) {
  2182. A = N0.getOperand(1);
  2183. Xor = N0.getOperand(0);
  2184. } else if (isBitwiseNot(N0.getOperand(1))) {
  2185. A = N0.getOperand(0);
  2186. Xor = N0.getOperand(1);
  2187. }
  2188. if (Xor)
  2189. return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
  2190. }
  2191. // Look for:
  2192. // add (add x, y), 1
  2193. // And if the target does not like this form then turn into:
  2194. // sub y, (xor x, -1)
  2195. if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
  2196. N0.getOpcode() == ISD::ADD) {
  2197. SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
  2198. DAG.getAllOnesConstant(DL, VT));
  2199. return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
  2200. }
  2201. }
  2202. // (x - y) + -1 -> add (xor y, -1), x
  2203. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
  2204. isAllOnesOrAllOnesSplat(N1)) {
  2205. SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
  2206. return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
  2207. }
  2208. if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
  2209. return Combined;
  2210. if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
  2211. return Combined;
  2212. return SDValue();
  2213. }
  2214. SDValue DAGCombiner::visitADD(SDNode *N) {
  2215. SDValue N0 = N->getOperand(0);
  2216. SDValue N1 = N->getOperand(1);
  2217. EVT VT = N0.getValueType();
  2218. SDLoc DL(N);
  2219. if (SDValue Combined = visitADDLike(N))
  2220. return Combined;
  2221. if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
  2222. return V;
  2223. if (SDValue V = foldAddSubOfSignBit(N, DAG))
  2224. return V;
  2225. // fold (a+b) -> (a|b) iff a and b share no bits.
  2226. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
  2227. DAG.haveNoCommonBitsSet(N0, N1))
  2228. return DAG.getNode(ISD::OR, DL, VT, N0, N1);
  2229. return SDValue();
  2230. }
  2231. SDValue DAGCombiner::visitADDSAT(SDNode *N) {
  2232. unsigned Opcode = N->getOpcode();
  2233. SDValue N0 = N->getOperand(0);
  2234. SDValue N1 = N->getOperand(1);
  2235. EVT VT = N0.getValueType();
  2236. SDLoc DL(N);
  2237. // fold vector ops
  2238. if (VT.isVector()) {
  2239. // TODO SimplifyVBinOp
  2240. // fold (add_sat x, 0) -> x, vector edition
  2241. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2242. return N0;
  2243. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  2244. return N1;
  2245. }
  2246. // fold (add_sat x, undef) -> -1
  2247. if (N0.isUndef() || N1.isUndef())
  2248. return DAG.getAllOnesConstant(DL, VT);
  2249. if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
  2250. // canonicalize constant to RHS
  2251. if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
  2252. return DAG.getNode(Opcode, DL, VT, N1, N0);
  2253. // fold (add_sat c1, c2) -> c3
  2254. return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
  2255. N1.getNode());
  2256. }
  2257. // fold (add_sat x, 0) -> x
  2258. if (isNullConstant(N1))
  2259. return N0;
  2260. // If it cannot overflow, transform into an add.
  2261. if (Opcode == ISD::UADDSAT)
  2262. if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
  2263. return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
  2264. return SDValue();
  2265. }
  2266. static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
  2267. bool Masked = false;
  2268. // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
  2269. while (true) {
  2270. if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
  2271. V = V.getOperand(0);
  2272. continue;
  2273. }
  2274. if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
  2275. Masked = true;
  2276. V = V.getOperand(0);
  2277. continue;
  2278. }
  2279. break;
  2280. }
  2281. // If this is not a carry, return.
  2282. if (V.getResNo() != 1)
  2283. return SDValue();
  2284. if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
  2285. V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
  2286. return SDValue();
  2287. EVT VT = V.getNode()->getValueType(0);
  2288. if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
  2289. return SDValue();
  2290. // If the result is masked, then no matter what kind of bool it is we can
  2291. // return. If it isn't, then we need to make sure the bool type is either 0 or
  2292. // 1 and not other values.
  2293. if (Masked ||
  2294. TLI.getBooleanContents(V.getValueType()) ==
  2295. TargetLoweringBase::ZeroOrOneBooleanContent)
  2296. return V;
  2297. return SDValue();
  2298. }
  2299. /// Given the operands of an add/sub operation, see if the 2nd operand is a
  2300. /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
  2301. /// the opcode and bypass the mask operation.
  2302. static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
  2303. SelectionDAG &DAG, const SDLoc &DL) {
  2304. if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
  2305. return SDValue();
  2306. EVT VT = N0.getValueType();
  2307. if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
  2308. return SDValue();
  2309. // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
  2310. // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
  2311. return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
  2312. }
  2313. /// Helper for doing combines based on N0 and N1 being added to each other.
  2314. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
  2315. SDNode *LocReference) {
  2316. EVT VT = N0.getValueType();
  2317. SDLoc DL(LocReference);
  2318. // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
  2319. if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
  2320. isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
  2321. return DAG.getNode(ISD::SUB, DL, VT, N0,
  2322. DAG.getNode(ISD::SHL, DL, VT,
  2323. N1.getOperand(0).getOperand(1),
  2324. N1.getOperand(1)));
  2325. if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
  2326. return V;
  2327. // Look for:
  2328. // add (add x, 1), y
  2329. // And if the target does not like this form then turn into:
  2330. // sub y, (xor x, -1)
  2331. if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
  2332. N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
  2333. SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
  2334. DAG.getAllOnesConstant(DL, VT));
  2335. return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
  2336. }
  2337. // Hoist one-use subtraction by non-opaque constant:
  2338. // (x - C) + y -> (x + y) - C
  2339. // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
  2340. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
  2341. isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
  2342. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
  2343. return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
  2344. }
  2345. // Hoist one-use subtraction from non-opaque constant:
  2346. // (C - x) + y -> (y - x) + C
  2347. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
  2348. isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
  2349. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
  2350. return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
  2351. }
  2352. // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
  2353. // rather than 'add 0/-1' (the zext should get folded).
  2354. // add (sext i1 Y), X --> sub X, (zext i1 Y)
  2355. if (N0.getOpcode() == ISD::SIGN_EXTEND &&
  2356. N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
  2357. TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
  2358. SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
  2359. return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
  2360. }
  2361. // add X, (sextinreg Y i1) -> sub X, (and Y 1)
  2362. if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
  2363. VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
  2364. if (TN->getVT() == MVT::i1) {
  2365. SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
  2366. DAG.getConstant(1, DL, VT));
  2367. return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
  2368. }
  2369. }
  2370. // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
  2371. if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
  2372. N1.getResNo() == 0)
  2373. return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
  2374. N0, N1.getOperand(0), N1.getOperand(2));
  2375. // (add X, Carry) -> (addcarry X, 0, Carry)
  2376. if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
  2377. if (SDValue Carry = getAsCarry(TLI, N1))
  2378. return DAG.getNode(ISD::ADDCARRY, DL,
  2379. DAG.getVTList(VT, Carry.getValueType()), N0,
  2380. DAG.getConstant(0, DL, VT), Carry);
  2381. return SDValue();
  2382. }
  2383. SDValue DAGCombiner::visitADDC(SDNode *N) {
  2384. SDValue N0 = N->getOperand(0);
  2385. SDValue N1 = N->getOperand(1);
  2386. EVT VT = N0.getValueType();
  2387. SDLoc DL(N);
  2388. // If the flag result is dead, turn this into an ADD.
  2389. if (!N->hasAnyUseOfValue(1))
  2390. return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
  2391. DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  2392. // canonicalize constant to RHS.
  2393. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  2394. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2395. if (N0C && !N1C)
  2396. return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
  2397. // fold (addc x, 0) -> x + no carry out
  2398. if (isNullConstant(N1))
  2399. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
  2400. DL, MVT::Glue));
  2401. // If it cannot overflow, transform into an add.
  2402. if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
  2403. return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
  2404. DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  2405. return SDValue();
  2406. }
  2407. static SDValue flipBoolean(SDValue V, const SDLoc &DL,
  2408. SelectionDAG &DAG, const TargetLowering &TLI) {
  2409. EVT VT = V.getValueType();
  2410. SDValue Cst;
  2411. switch (TLI.getBooleanContents(VT)) {
  2412. case TargetLowering::ZeroOrOneBooleanContent:
  2413. case TargetLowering::UndefinedBooleanContent:
  2414. Cst = DAG.getConstant(1, DL, VT);
  2415. break;
  2416. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  2417. Cst = DAG.getAllOnesConstant(DL, VT);
  2418. break;
  2419. }
  2420. return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
  2421. }
  2422. /**
  2423. * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
  2424. * then the flip also occurs if computing the inverse is the same cost.
  2425. * This function returns an empty SDValue in case it cannot flip the boolean
  2426. * without increasing the cost of the computation. If you want to flip a boolean
  2427. * no matter what, use flipBoolean.
  2428. */
  2429. static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
  2430. const TargetLowering &TLI,
  2431. bool Force) {
  2432. if (Force && isa<ConstantSDNode>(V))
  2433. return flipBoolean(V, SDLoc(V), DAG, TLI);
  2434. if (V.getOpcode() != ISD::XOR)
  2435. return SDValue();
  2436. ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
  2437. if (!Const)
  2438. return SDValue();
  2439. EVT VT = V.getValueType();
  2440. bool IsFlip = false;
  2441. switch(TLI.getBooleanContents(VT)) {
  2442. case TargetLowering::ZeroOrOneBooleanContent:
  2443. IsFlip = Const->isOne();
  2444. break;
  2445. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  2446. IsFlip = Const->isAllOnesValue();
  2447. break;
  2448. case TargetLowering::UndefinedBooleanContent:
  2449. IsFlip = (Const->getAPIntValue() & 0x01) == 1;
  2450. break;
  2451. }
  2452. if (IsFlip)
  2453. return V.getOperand(0);
  2454. if (Force)
  2455. return flipBoolean(V, SDLoc(V), DAG, TLI);
  2456. return SDValue();
  2457. }
  2458. SDValue DAGCombiner::visitADDO(SDNode *N) {
  2459. SDValue N0 = N->getOperand(0);
  2460. SDValue N1 = N->getOperand(1);
  2461. EVT VT = N0.getValueType();
  2462. bool IsSigned = (ISD::SADDO == N->getOpcode());
  2463. EVT CarryVT = N->getValueType(1);
  2464. SDLoc DL(N);
  2465. // If the flag result is dead, turn this into an ADD.
  2466. if (!N->hasAnyUseOfValue(1))
  2467. return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
  2468. DAG.getUNDEF(CarryVT));
  2469. // canonicalize constant to RHS.
  2470. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  2471. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  2472. return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
  2473. // fold (addo x, 0) -> x + no carry out
  2474. if (isNullOrNullSplat(N1))
  2475. return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
  2476. if (!IsSigned) {
  2477. // If it cannot overflow, transform into an add.
  2478. if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
  2479. return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
  2480. DAG.getConstant(0, DL, CarryVT));
  2481. // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
  2482. if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
  2483. SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
  2484. DAG.getConstant(0, DL, VT), N0.getOperand(0));
  2485. return CombineTo(N, Sub,
  2486. flipBoolean(Sub.getValue(1), DL, DAG, TLI));
  2487. }
  2488. if (SDValue Combined = visitUADDOLike(N0, N1, N))
  2489. return Combined;
  2490. if (SDValue Combined = visitUADDOLike(N1, N0, N))
  2491. return Combined;
  2492. }
  2493. return SDValue();
  2494. }
  2495. SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
  2496. EVT VT = N0.getValueType();
  2497. if (VT.isVector())
  2498. return SDValue();
  2499. // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
  2500. // If Y + 1 cannot overflow.
  2501. if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
  2502. SDValue Y = N1.getOperand(0);
  2503. SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
  2504. if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
  2505. return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
  2506. N1.getOperand(2));
  2507. }
  2508. // (uaddo X, Carry) -> (addcarry X, 0, Carry)
  2509. if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
  2510. if (SDValue Carry = getAsCarry(TLI, N1))
  2511. return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
  2512. DAG.getConstant(0, SDLoc(N), VT), Carry);
  2513. return SDValue();
  2514. }
  2515. SDValue DAGCombiner::visitADDE(SDNode *N) {
  2516. SDValue N0 = N->getOperand(0);
  2517. SDValue N1 = N->getOperand(1);
  2518. SDValue CarryIn = N->getOperand(2);
  2519. // canonicalize constant to RHS
  2520. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  2521. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2522. if (N0C && !N1C)
  2523. return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
  2524. N1, N0, CarryIn);
  2525. // fold (adde x, y, false) -> (addc x, y)
  2526. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  2527. return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
  2528. return SDValue();
  2529. }
  2530. SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
  2531. SDValue N0 = N->getOperand(0);
  2532. SDValue N1 = N->getOperand(1);
  2533. SDValue CarryIn = N->getOperand(2);
  2534. SDLoc DL(N);
  2535. // canonicalize constant to RHS
  2536. ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
  2537. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  2538. if (N0C && !N1C)
  2539. return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
  2540. // fold (addcarry x, y, false) -> (uaddo x, y)
  2541. if (isNullConstant(CarryIn)) {
  2542. if (!LegalOperations ||
  2543. TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
  2544. return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
  2545. }
  2546. // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
  2547. if (isNullConstant(N0) && isNullConstant(N1)) {
  2548. EVT VT = N0.getValueType();
  2549. EVT CarryVT = CarryIn.getValueType();
  2550. SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
  2551. AddToWorklist(CarryExt.getNode());
  2552. return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
  2553. DAG.getConstant(1, DL, VT)),
  2554. DAG.getConstant(0, DL, CarryVT));
  2555. }
  2556. if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
  2557. return Combined;
  2558. if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
  2559. return Combined;
  2560. return SDValue();
  2561. }
  2562. /**
  2563. * If we are facing some sort of diamond carry propapagtion pattern try to
  2564. * break it up to generate something like:
  2565. * (addcarry X, 0, (addcarry A, B, Z):Carry)
  2566. *
  2567. * The end result is usually an increase in operation required, but because the
  2568. * carry is now linearized, other tranforms can kick in and optimize the DAG.
  2569. *
  2570. * Patterns typically look something like
  2571. * (uaddo A, B)
  2572. * / \
  2573. * Carry Sum
  2574. * | \
  2575. * | (addcarry *, 0, Z)
  2576. * | /
  2577. * \ Carry
  2578. * | /
  2579. * (addcarry X, *, *)
  2580. *
  2581. * But numerous variation exist. Our goal is to identify A, B, X and Z and
  2582. * produce a combine with a single path for carry propagation.
  2583. */
  2584. static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
  2585. SDValue X, SDValue Carry0, SDValue Carry1,
  2586. SDNode *N) {
  2587. if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
  2588. return SDValue();
  2589. if (Carry1.getOpcode() != ISD::UADDO)
  2590. return SDValue();
  2591. SDValue Z;
  2592. /**
  2593. * First look for a suitable Z. It will present itself in the form of
  2594. * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
  2595. */
  2596. if (Carry0.getOpcode() == ISD::ADDCARRY &&
  2597. isNullConstant(Carry0.getOperand(1))) {
  2598. Z = Carry0.getOperand(2);
  2599. } else if (Carry0.getOpcode() == ISD::UADDO &&
  2600. isOneConstant(Carry0.getOperand(1))) {
  2601. EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
  2602. Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
  2603. } else {
  2604. // We couldn't find a suitable Z.
  2605. return SDValue();
  2606. }
  2607. auto cancelDiamond = [&](SDValue A,SDValue B) {
  2608. SDLoc DL(N);
  2609. SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
  2610. Combiner.AddToWorklist(NewY.getNode());
  2611. return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
  2612. DAG.getConstant(0, DL, X.getValueType()),
  2613. NewY.getValue(1));
  2614. };
  2615. /**
  2616. * (uaddo A, B)
  2617. * |
  2618. * Sum
  2619. * |
  2620. * (addcarry *, 0, Z)
  2621. */
  2622. if (Carry0.getOperand(0) == Carry1.getValue(0)) {
  2623. return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
  2624. }
  2625. /**
  2626. * (addcarry A, 0, Z)
  2627. * |
  2628. * Sum
  2629. * |
  2630. * (uaddo *, B)
  2631. */
  2632. if (Carry1.getOperand(0) == Carry0.getValue(0)) {
  2633. return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
  2634. }
  2635. if (Carry1.getOperand(1) == Carry0.getValue(0)) {
  2636. return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
  2637. }
  2638. return SDValue();
  2639. }
  2640. SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
  2641. SDNode *N) {
  2642. // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
  2643. if (isBitwiseNot(N0))
  2644. if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
  2645. SDLoc DL(N);
  2646. SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
  2647. N0.getOperand(0), NotC);
  2648. return CombineTo(N, Sub,
  2649. flipBoolean(Sub.getValue(1), DL, DAG, TLI));
  2650. }
  2651. // Iff the flag result is dead:
  2652. // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
  2653. // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
  2654. // or the dependency between the instructions.
  2655. if ((N0.getOpcode() == ISD::ADD ||
  2656. (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
  2657. N0.getValue(1) != CarryIn)) &&
  2658. isNullConstant(N1) && !N->hasAnyUseOfValue(1))
  2659. return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
  2660. N0.getOperand(0), N0.getOperand(1), CarryIn);
  2661. /**
  2662. * When one of the addcarry argument is itself a carry, we may be facing
  2663. * a diamond carry propagation. In which case we try to transform the DAG
  2664. * to ensure linear carry propagation if that is possible.
  2665. */
  2666. if (auto Y = getAsCarry(TLI, N1)) {
  2667. // Because both are carries, Y and Z can be swapped.
  2668. if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
  2669. return R;
  2670. if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
  2671. return R;
  2672. }
  2673. return SDValue();
  2674. }
  2675. // Since it may not be valid to emit a fold to zero for vector initializers
  2676. // check if we can before folding.
  2677. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
  2678. SelectionDAG &DAG, bool LegalOperations) {
  2679. if (!VT.isVector())
  2680. return DAG.getConstant(0, DL, VT);
  2681. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  2682. return DAG.getConstant(0, DL, VT);
  2683. return SDValue();
  2684. }
  2685. SDValue DAGCombiner::visitSUB(SDNode *N) {
  2686. SDValue N0 = N->getOperand(0);
  2687. SDValue N1 = N->getOperand(1);
  2688. EVT VT = N0.getValueType();
  2689. SDLoc DL(N);
  2690. // fold vector ops
  2691. if (VT.isVector()) {
  2692. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  2693. return FoldedVOp;
  2694. // fold (sub x, 0) -> x, vector edition
  2695. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2696. return N0;
  2697. }
  2698. // fold (sub x, x) -> 0
  2699. // FIXME: Refactor this and xor and other similar operations together.
  2700. if (N0 == N1)
  2701. return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
  2702. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  2703. DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
  2704. // fold (sub c1, c2) -> c1-c2
  2705. return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
  2706. N1.getNode());
  2707. }
  2708. if (SDValue NewSel = foldBinOpIntoSelect(N))
  2709. return NewSel;
  2710. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  2711. // fold (sub x, c) -> (add x, -c)
  2712. if (N1C) {
  2713. return DAG.getNode(ISD::ADD, DL, VT, N0,
  2714. DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
  2715. }
  2716. if (isNullOrNullSplat(N0)) {
  2717. unsigned BitWidth = VT.getScalarSizeInBits();
  2718. // Right-shifting everything out but the sign bit followed by negation is
  2719. // the same as flipping arithmetic/logical shift type without the negation:
  2720. // -(X >>u 31) -> (X >>s 31)
  2721. // -(X >>s 31) -> (X >>u 31)
  2722. if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
  2723. ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
  2724. if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
  2725. auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
  2726. if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
  2727. return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
  2728. }
  2729. }
  2730. // 0 - X --> 0 if the sub is NUW.
  2731. if (N->getFlags().hasNoUnsignedWrap())
  2732. return N0;
  2733. if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
  2734. // N1 is either 0 or the minimum signed value. If the sub is NSW, then
  2735. // N1 must be 0 because negating the minimum signed value is undefined.
  2736. if (N->getFlags().hasNoSignedWrap())
  2737. return N0;
  2738. // 0 - X --> X if X is 0 or the minimum signed value.
  2739. return N1;
  2740. }
  2741. }
  2742. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
  2743. if (isAllOnesOrAllOnesSplat(N0))
  2744. return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
  2745. // fold (A - (0-B)) -> A+B
  2746. if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
  2747. return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
  2748. // fold A-(A-B) -> B
  2749. if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
  2750. return N1.getOperand(1);
  2751. // fold (A+B)-A -> B
  2752. if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
  2753. return N0.getOperand(1);
  2754. // fold (A+B)-B -> A
  2755. if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
  2756. return N0.getOperand(0);
  2757. // fold (A+C1)-C2 -> A+(C1-C2)
  2758. if (N0.getOpcode() == ISD::ADD &&
  2759. isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
  2760. isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
  2761. SDValue NewC = DAG.FoldConstantArithmetic(
  2762. ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
  2763. assert(NewC && "Constant folding failed");
  2764. return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
  2765. }
  2766. // fold C2-(A+C1) -> (C2-C1)-A
  2767. if (N1.getOpcode() == ISD::ADD) {
  2768. SDValue N11 = N1.getOperand(1);
  2769. if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
  2770. isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
  2771. SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
  2772. N11.getNode());
  2773. assert(NewC && "Constant folding failed");
  2774. return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
  2775. }
  2776. }
  2777. // fold (A-C1)-C2 -> A-(C1+C2)
  2778. if (N0.getOpcode() == ISD::SUB &&
  2779. isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
  2780. isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
  2781. SDValue NewC = DAG.FoldConstantArithmetic(
  2782. ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
  2783. assert(NewC && "Constant folding failed");
  2784. return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
  2785. }
  2786. // fold (c1-A)-c2 -> (c1-c2)-A
  2787. if (N0.getOpcode() == ISD::SUB &&
  2788. isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
  2789. isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
  2790. SDValue NewC = DAG.FoldConstantArithmetic(
  2791. ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
  2792. assert(NewC && "Constant folding failed");
  2793. return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
  2794. }
  2795. // fold ((A+(B+or-C))-B) -> A+or-C
  2796. if (N0.getOpcode() == ISD::ADD &&
  2797. (N0.getOperand(1).getOpcode() == ISD::SUB ||
  2798. N0.getOperand(1).getOpcode() == ISD::ADD) &&
  2799. N0.getOperand(1).getOperand(0) == N1)
  2800. return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
  2801. N0.getOperand(1).getOperand(1));
  2802. // fold ((A+(C+B))-B) -> A+C
  2803. if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
  2804. N0.getOperand(1).getOperand(1) == N1)
  2805. return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
  2806. N0.getOperand(1).getOperand(0));
  2807. // fold ((A-(B-C))-C) -> A-B
  2808. if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
  2809. N0.getOperand(1).getOperand(1) == N1)
  2810. return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
  2811. N0.getOperand(1).getOperand(0));
  2812. // fold (A-(B-C)) -> A+(C-B)
  2813. if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
  2814. return DAG.getNode(ISD::ADD, DL, VT, N0,
  2815. DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
  2816. N1.getOperand(0)));
  2817. // fold (X - (-Y * Z)) -> (X + (Y * Z))
  2818. if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
  2819. if (N1.getOperand(0).getOpcode() == ISD::SUB &&
  2820. isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
  2821. SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
  2822. N1.getOperand(0).getOperand(1),
  2823. N1.getOperand(1));
  2824. return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
  2825. }
  2826. if (N1.getOperand(1).getOpcode() == ISD::SUB &&
  2827. isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
  2828. SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
  2829. N1.getOperand(0),
  2830. N1.getOperand(1).getOperand(1));
  2831. return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
  2832. }
  2833. }
  2834. // If either operand of a sub is undef, the result is undef
  2835. if (N0.isUndef())
  2836. return N0;
  2837. if (N1.isUndef())
  2838. return N1;
  2839. if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
  2840. return V;
  2841. if (SDValue V = foldAddSubOfSignBit(N, DAG))
  2842. return V;
  2843. if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
  2844. return V;
  2845. // (x - y) - 1 -> add (xor y, -1), x
  2846. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
  2847. SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
  2848. DAG.getAllOnesConstant(DL, VT));
  2849. return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
  2850. }
  2851. // Look for:
  2852. // sub y, (xor x, -1)
  2853. // And if the target does not like this form then turn into:
  2854. // add (add x, y), 1
  2855. if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
  2856. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
  2857. return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
  2858. }
  2859. // Hoist one-use addition by non-opaque constant:
  2860. // (x + C) - y -> (x - y) + C
  2861. if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
  2862. isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
  2863. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
  2864. return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
  2865. }
  2866. // y - (x + C) -> (y - x) - C
  2867. if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
  2868. isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
  2869. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
  2870. return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
  2871. }
  2872. // (x - C) - y -> (x - y) - C
  2873. // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
  2874. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
  2875. isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
  2876. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
  2877. return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
  2878. }
  2879. // (C - x) - y -> C - (x + y)
  2880. if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
  2881. isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
  2882. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
  2883. return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
  2884. }
  2885. // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
  2886. // rather than 'sub 0/1' (the sext should get folded).
  2887. // sub X, (zext i1 Y) --> add X, (sext i1 Y)
  2888. if (N1.getOpcode() == ISD::ZERO_EXTEND &&
  2889. N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
  2890. TLI.getBooleanContents(VT) ==
  2891. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  2892. SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
  2893. return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
  2894. }
  2895. // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
  2896. if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
  2897. if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
  2898. SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
  2899. SDValue S0 = N1.getOperand(0);
  2900. if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
  2901. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  2902. if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
  2903. if (C->getAPIntValue() == (OpSizeInBits - 1))
  2904. return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
  2905. }
  2906. }
  2907. }
  2908. // If the relocation model supports it, consider symbol offsets.
  2909. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
  2910. if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
  2911. // fold (sub Sym, c) -> Sym-c
  2912. if (N1C && GA->getOpcode() == ISD::GlobalAddress)
  2913. return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
  2914. GA->getOffset() -
  2915. (uint64_t)N1C->getSExtValue());
  2916. // fold (sub Sym+c1, Sym+c2) -> c1-c2
  2917. if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
  2918. if (GA->getGlobal() == GB->getGlobal())
  2919. return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
  2920. DL, VT);
  2921. }
  2922. // sub X, (sextinreg Y i1) -> add X, (and Y 1)
  2923. if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
  2924. VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
  2925. if (TN->getVT() == MVT::i1) {
  2926. SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
  2927. DAG.getConstant(1, DL, VT));
  2928. return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
  2929. }
  2930. }
  2931. // Prefer an add for more folding potential and possibly better codegen:
  2932. // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
  2933. if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
  2934. SDValue ShAmt = N1.getOperand(1);
  2935. ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
  2936. if (ShAmtC &&
  2937. ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
  2938. SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
  2939. return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
  2940. }
  2941. }
  2942. if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
  2943. // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
  2944. if (SDValue Carry = getAsCarry(TLI, N0)) {
  2945. SDValue X = N1;
  2946. SDValue Zero = DAG.getConstant(0, DL, VT);
  2947. SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
  2948. return DAG.getNode(ISD::ADDCARRY, DL,
  2949. DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
  2950. Carry);
  2951. }
  2952. }
  2953. return SDValue();
  2954. }
  2955. SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
  2956. SDValue N0 = N->getOperand(0);
  2957. SDValue N1 = N->getOperand(1);
  2958. EVT VT = N0.getValueType();
  2959. SDLoc DL(N);
  2960. // fold vector ops
  2961. if (VT.isVector()) {
  2962. // TODO SimplifyVBinOp
  2963. // fold (sub_sat x, 0) -> x, vector edition
  2964. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  2965. return N0;
  2966. }
  2967. // fold (sub_sat x, undef) -> 0
  2968. if (N0.isUndef() || N1.isUndef())
  2969. return DAG.getConstant(0, DL, VT);
  2970. // fold (sub_sat x, x) -> 0
  2971. if (N0 == N1)
  2972. return DAG.getConstant(0, DL, VT);
  2973. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  2974. DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
  2975. // fold (sub_sat c1, c2) -> c3
  2976. return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
  2977. N1.getNode());
  2978. }
  2979. // fold (sub_sat x, 0) -> x
  2980. if (isNullConstant(N1))
  2981. return N0;
  2982. return SDValue();
  2983. }
  2984. SDValue DAGCombiner::visitSUBC(SDNode *N) {
  2985. SDValue N0 = N->getOperand(0);
  2986. SDValue N1 = N->getOperand(1);
  2987. EVT VT = N0.getValueType();
  2988. SDLoc DL(N);
  2989. // If the flag result is dead, turn this into an SUB.
  2990. if (!N->hasAnyUseOfValue(1))
  2991. return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
  2992. DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  2993. // fold (subc x, x) -> 0 + no borrow
  2994. if (N0 == N1)
  2995. return CombineTo(N, DAG.getConstant(0, DL, VT),
  2996. DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  2997. // fold (subc x, 0) -> x + no borrow
  2998. if (isNullConstant(N1))
  2999. return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  3000. // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
  3001. if (isAllOnesConstant(N0))
  3002. return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
  3003. DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
  3004. return SDValue();
  3005. }
  3006. SDValue DAGCombiner::visitSUBO(SDNode *N) {
  3007. SDValue N0 = N->getOperand(0);
  3008. SDValue N1 = N->getOperand(1);
  3009. EVT VT = N0.getValueType();
  3010. bool IsSigned = (ISD::SSUBO == N->getOpcode());
  3011. EVT CarryVT = N->getValueType(1);
  3012. SDLoc DL(N);
  3013. // If the flag result is dead, turn this into an SUB.
  3014. if (!N->hasAnyUseOfValue(1))
  3015. return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
  3016. DAG.getUNDEF(CarryVT));
  3017. // fold (subo x, x) -> 0 + no borrow
  3018. if (N0 == N1)
  3019. return CombineTo(N, DAG.getConstant(0, DL, VT),
  3020. DAG.getConstant(0, DL, CarryVT));
  3021. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  3022. // fold (subox, c) -> (addo x, -c)
  3023. if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
  3024. return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
  3025. DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
  3026. }
  3027. // fold (subo x, 0) -> x + no borrow
  3028. if (isNullOrNullSplat(N1))
  3029. return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
  3030. // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
  3031. if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
  3032. return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
  3033. DAG.getConstant(0, DL, CarryVT));
  3034. return SDValue();
  3035. }
  3036. SDValue DAGCombiner::visitSUBE(SDNode *N) {
  3037. SDValue N0 = N->getOperand(0);
  3038. SDValue N1 = N->getOperand(1);
  3039. SDValue CarryIn = N->getOperand(2);
  3040. // fold (sube x, y, false) -> (subc x, y)
  3041. if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
  3042. return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
  3043. return SDValue();
  3044. }
  3045. SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
  3046. SDValue N0 = N->getOperand(0);
  3047. SDValue N1 = N->getOperand(1);
  3048. SDValue CarryIn = N->getOperand(2);
  3049. // fold (subcarry x, y, false) -> (usubo x, y)
  3050. if (isNullConstant(CarryIn)) {
  3051. if (!LegalOperations ||
  3052. TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
  3053. return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
  3054. }
  3055. return SDValue();
  3056. }
  3057. // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
  3058. // UMULFIXSAT here.
  3059. SDValue DAGCombiner::visitMULFIX(SDNode *N) {
  3060. SDValue N0 = N->getOperand(0);
  3061. SDValue N1 = N->getOperand(1);
  3062. SDValue Scale = N->getOperand(2);
  3063. EVT VT = N0.getValueType();
  3064. // fold (mulfix x, undef, scale) -> 0
  3065. if (N0.isUndef() || N1.isUndef())
  3066. return DAG.getConstant(0, SDLoc(N), VT);
  3067. // Canonicalize constant to RHS (vector doesn't have to splat)
  3068. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  3069. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  3070. return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
  3071. // fold (mulfix x, 0, scale) -> 0
  3072. if (isNullConstant(N1))
  3073. return DAG.getConstant(0, SDLoc(N), VT);
  3074. return SDValue();
  3075. }
  3076. SDValue DAGCombiner::visitMUL(SDNode *N) {
  3077. SDValue N0 = N->getOperand(0);
  3078. SDValue N1 = N->getOperand(1);
  3079. EVT VT = N0.getValueType();
  3080. // fold (mul x, undef) -> 0
  3081. if (N0.isUndef() || N1.isUndef())
  3082. return DAG.getConstant(0, SDLoc(N), VT);
  3083. bool N0IsConst = false;
  3084. bool N1IsConst = false;
  3085. bool N1IsOpaqueConst = false;
  3086. bool N0IsOpaqueConst = false;
  3087. APInt ConstValue0, ConstValue1;
  3088. // fold vector ops
  3089. if (VT.isVector()) {
  3090. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3091. return FoldedVOp;
  3092. N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
  3093. N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
  3094. assert((!N0IsConst ||
  3095. ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
  3096. "Splat APInt should be element width");
  3097. assert((!N1IsConst ||
  3098. ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
  3099. "Splat APInt should be element width");
  3100. } else {
  3101. N0IsConst = isa<ConstantSDNode>(N0);
  3102. if (N0IsConst) {
  3103. ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
  3104. N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
  3105. }
  3106. N1IsConst = isa<ConstantSDNode>(N1);
  3107. if (N1IsConst) {
  3108. ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
  3109. N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
  3110. }
  3111. }
  3112. // fold (mul c1, c2) -> c1*c2
  3113. if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
  3114. return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
  3115. N0.getNode(), N1.getNode());
  3116. // canonicalize constant to RHS (vector doesn't have to splat)
  3117. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  3118. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  3119. return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
  3120. // fold (mul x, 0) -> 0
  3121. if (N1IsConst && ConstValue1.isNullValue())
  3122. return N1;
  3123. // fold (mul x, 1) -> x
  3124. if (N1IsConst && ConstValue1.isOneValue())
  3125. return N0;
  3126. if (SDValue NewSel = foldBinOpIntoSelect(N))
  3127. return NewSel;
  3128. // fold (mul x, -1) -> 0-x
  3129. if (N1IsConst && ConstValue1.isAllOnesValue()) {
  3130. SDLoc DL(N);
  3131. return DAG.getNode(ISD::SUB, DL, VT,
  3132. DAG.getConstant(0, DL, VT), N0);
  3133. }
  3134. // fold (mul x, (1 << c)) -> x << c
  3135. if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
  3136. DAG.isKnownToBeAPowerOfTwo(N1) &&
  3137. (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
  3138. SDLoc DL(N);
  3139. SDValue LogBase2 = BuildLogBase2(N1, DL);
  3140. EVT ShiftVT = getShiftAmountTy(N0.getValueType());
  3141. SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
  3142. return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
  3143. }
  3144. // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
  3145. if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
  3146. unsigned Log2Val = (-ConstValue1).logBase2();
  3147. SDLoc DL(N);
  3148. // FIXME: If the input is something that is easily negated (e.g. a
  3149. // single-use add), we should put the negate there.
  3150. return DAG.getNode(ISD::SUB, DL, VT,
  3151. DAG.getConstant(0, DL, VT),
  3152. DAG.getNode(ISD::SHL, DL, VT, N0,
  3153. DAG.getConstant(Log2Val, DL,
  3154. getShiftAmountTy(N0.getValueType()))));
  3155. }
  3156. // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
  3157. // mul x, (2^N + 1) --> add (shl x, N), x
  3158. // mul x, (2^N - 1) --> sub (shl x, N), x
  3159. // Examples: x * 33 --> (x << 5) + x
  3160. // x * 15 --> (x << 4) - x
  3161. // x * -33 --> -((x << 5) + x)
  3162. // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
  3163. if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
  3164. // TODO: We could handle more general decomposition of any constant by
  3165. // having the target set a limit on number of ops and making a
  3166. // callback to determine that sequence (similar to sqrt expansion).
  3167. unsigned MathOp = ISD::DELETED_NODE;
  3168. APInt MulC = ConstValue1.abs();
  3169. if ((MulC - 1).isPowerOf2())
  3170. MathOp = ISD::ADD;
  3171. else if ((MulC + 1).isPowerOf2())
  3172. MathOp = ISD::SUB;
  3173. if (MathOp != ISD::DELETED_NODE) {
  3174. unsigned ShAmt =
  3175. MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
  3176. assert(ShAmt < VT.getScalarSizeInBits() &&
  3177. "multiply-by-constant generated out of bounds shift");
  3178. SDLoc DL(N);
  3179. SDValue Shl =
  3180. DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
  3181. SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
  3182. if (ConstValue1.isNegative())
  3183. R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
  3184. return R;
  3185. }
  3186. }
  3187. // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
  3188. if (N0.getOpcode() == ISD::SHL &&
  3189. isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
  3190. isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
  3191. SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
  3192. if (isConstantOrConstantVector(C3))
  3193. return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
  3194. }
  3195. // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
  3196. // use.
  3197. {
  3198. SDValue Sh(nullptr, 0), Y(nullptr, 0);
  3199. // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
  3200. if (N0.getOpcode() == ISD::SHL &&
  3201. isConstantOrConstantVector(N0.getOperand(1)) &&
  3202. N0.getNode()->hasOneUse()) {
  3203. Sh = N0; Y = N1;
  3204. } else if (N1.getOpcode() == ISD::SHL &&
  3205. isConstantOrConstantVector(N1.getOperand(1)) &&
  3206. N1.getNode()->hasOneUse()) {
  3207. Sh = N1; Y = N0;
  3208. }
  3209. if (Sh.getNode()) {
  3210. SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
  3211. return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
  3212. }
  3213. }
  3214. // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
  3215. if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
  3216. N0.getOpcode() == ISD::ADD &&
  3217. DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
  3218. isMulAddWithConstProfitable(N, N0, N1))
  3219. return DAG.getNode(ISD::ADD, SDLoc(N), VT,
  3220. DAG.getNode(ISD::MUL, SDLoc(N0), VT,
  3221. N0.getOperand(0), N1),
  3222. DAG.getNode(ISD::MUL, SDLoc(N1), VT,
  3223. N0.getOperand(1), N1));
  3224. // reassociate mul
  3225. if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
  3226. return RMUL;
  3227. return SDValue();
  3228. }
  3229. /// Return true if divmod libcall is available.
  3230. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
  3231. const TargetLowering &TLI) {
  3232. RTLIB::Libcall LC;
  3233. EVT NodeType = Node->getValueType(0);
  3234. if (!NodeType.isSimple())
  3235. return false;
  3236. switch (NodeType.getSimpleVT().SimpleTy) {
  3237. default: return false; // No libcall for vector types.
  3238. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
  3239. case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
  3240. case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
  3241. case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
  3242. case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
  3243. }
  3244. return TLI.getLibcallName(LC) != nullptr;
  3245. }
  3246. /// Issue divrem if both quotient and remainder are needed.
  3247. SDValue DAGCombiner::useDivRem(SDNode *Node) {
  3248. if (Node->use_empty())
  3249. return SDValue(); // This is a dead node, leave it alone.
  3250. unsigned Opcode = Node->getOpcode();
  3251. bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
  3252. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
  3253. // DivMod lib calls can still work on non-legal types if using lib-calls.
  3254. EVT VT = Node->getValueType(0);
  3255. if (VT.isVector() || !VT.isInteger())
  3256. return SDValue();
  3257. if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
  3258. return SDValue();
  3259. // If DIVREM is going to get expanded into a libcall,
  3260. // but there is no libcall available, then don't combine.
  3261. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
  3262. !isDivRemLibcallAvailable(Node, isSigned, TLI))
  3263. return SDValue();
  3264. // If div is legal, it's better to do the normal expansion
  3265. unsigned OtherOpcode = 0;
  3266. if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
  3267. OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
  3268. if (TLI.isOperationLegalOrCustom(Opcode, VT))
  3269. return SDValue();
  3270. } else {
  3271. OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
  3272. if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
  3273. return SDValue();
  3274. }
  3275. SDValue Op0 = Node->getOperand(0);
  3276. SDValue Op1 = Node->getOperand(1);
  3277. SDValue combined;
  3278. for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
  3279. UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
  3280. SDNode *User = *UI;
  3281. if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
  3282. User->use_empty())
  3283. continue;
  3284. // Convert the other matching node(s), too;
  3285. // otherwise, the DIVREM may get target-legalized into something
  3286. // target-specific that we won't be able to recognize.
  3287. unsigned UserOpc = User->getOpcode();
  3288. if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
  3289. User->getOperand(0) == Op0 &&
  3290. User->getOperand(1) == Op1) {
  3291. if (!combined) {
  3292. if (UserOpc == OtherOpcode) {
  3293. SDVTList VTs = DAG.getVTList(VT, VT);
  3294. combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
  3295. } else if (UserOpc == DivRemOpc) {
  3296. combined = SDValue(User, 0);
  3297. } else {
  3298. assert(UserOpc == Opcode);
  3299. continue;
  3300. }
  3301. }
  3302. if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
  3303. CombineTo(User, combined);
  3304. else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
  3305. CombineTo(User, combined.getValue(1));
  3306. }
  3307. }
  3308. return combined;
  3309. }
  3310. static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
  3311. SDValue N0 = N->getOperand(0);
  3312. SDValue N1 = N->getOperand(1);
  3313. EVT VT = N->getValueType(0);
  3314. SDLoc DL(N);
  3315. unsigned Opc = N->getOpcode();
  3316. bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
  3317. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  3318. // X / undef -> undef
  3319. // X % undef -> undef
  3320. // X / 0 -> undef
  3321. // X % 0 -> undef
  3322. // NOTE: This includes vectors where any divisor element is zero/undef.
  3323. if (DAG.isUndef(Opc, {N0, N1}))
  3324. return DAG.getUNDEF(VT);
  3325. // undef / X -> 0
  3326. // undef % X -> 0
  3327. if (N0.isUndef())
  3328. return DAG.getConstant(0, DL, VT);
  3329. // 0 / X -> 0
  3330. // 0 % X -> 0
  3331. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  3332. if (N0C && N0C->isNullValue())
  3333. return N0;
  3334. // X / X -> 1
  3335. // X % X -> 0
  3336. if (N0 == N1)
  3337. return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
  3338. // X / 1 -> X
  3339. // X % 1 -> 0
  3340. // If this is a boolean op (single-bit element type), we can't have
  3341. // division-by-zero or remainder-by-zero, so assume the divisor is 1.
  3342. // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
  3343. // it's a 1.
  3344. if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
  3345. return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
  3346. return SDValue();
  3347. }
  3348. SDValue DAGCombiner::visitSDIV(SDNode *N) {
  3349. SDValue N0 = N->getOperand(0);
  3350. SDValue N1 = N->getOperand(1);
  3351. EVT VT = N->getValueType(0);
  3352. EVT CCVT = getSetCCResultType(VT);
  3353. // fold vector ops
  3354. if (VT.isVector())
  3355. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3356. return FoldedVOp;
  3357. SDLoc DL(N);
  3358. // fold (sdiv c1, c2) -> c1/c2
  3359. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  3360. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  3361. if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
  3362. return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
  3363. // fold (sdiv X, -1) -> 0-X
  3364. if (N1C && N1C->isAllOnesValue())
  3365. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
  3366. // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
  3367. if (N1C && N1C->getAPIntValue().isMinSignedValue())
  3368. return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
  3369. DAG.getConstant(1, DL, VT),
  3370. DAG.getConstant(0, DL, VT));
  3371. if (SDValue V = simplifyDivRem(N, DAG))
  3372. return V;
  3373. if (SDValue NewSel = foldBinOpIntoSelect(N))
  3374. return NewSel;
  3375. // If we know the sign bits of both operands are zero, strength reduce to a
  3376. // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
  3377. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  3378. return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
  3379. if (SDValue V = visitSDIVLike(N0, N1, N)) {
  3380. // If the corresponding remainder node exists, update its users with
  3381. // (Dividend - (Quotient * Divisor).
  3382. if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
  3383. { N0, N1 })) {
  3384. SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
  3385. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
  3386. AddToWorklist(Mul.getNode());
  3387. AddToWorklist(Sub.getNode());
  3388. CombineTo(RemNode, Sub);
  3389. }
  3390. return V;
  3391. }
  3392. // sdiv, srem -> sdivrem
  3393. // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
  3394. // true. Otherwise, we break the simplification logic in visitREM().
  3395. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3396. if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
  3397. if (SDValue DivRem = useDivRem(N))
  3398. return DivRem;
  3399. return SDValue();
  3400. }
  3401. SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
  3402. SDLoc DL(N);
  3403. EVT VT = N->getValueType(0);
  3404. EVT CCVT = getSetCCResultType(VT);
  3405. unsigned BitWidth = VT.getScalarSizeInBits();
  3406. // Helper for determining whether a value is a power-2 constant scalar or a
  3407. // vector of such elements.
  3408. auto IsPowerOfTwo = [](ConstantSDNode *C) {
  3409. if (C->isNullValue() || C->isOpaque())
  3410. return false;
  3411. if (C->getAPIntValue().isPowerOf2())
  3412. return true;
  3413. if ((-C->getAPIntValue()).isPowerOf2())
  3414. return true;
  3415. return false;
  3416. };
  3417. // fold (sdiv X, pow2) -> simple ops after legalize
  3418. // FIXME: We check for the exact bit here because the generic lowering gives
  3419. // better results in that case. The target-specific lowering should learn how
  3420. // to handle exact sdivs efficiently.
  3421. if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
  3422. // Target-specific implementation of sdiv x, pow2.
  3423. if (SDValue Res = BuildSDIVPow2(N))
  3424. return Res;
  3425. // Create constants that are functions of the shift amount value.
  3426. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
  3427. SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
  3428. SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
  3429. C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
  3430. SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
  3431. if (!isConstantOrConstantVector(Inexact))
  3432. return SDValue();
  3433. // Splat the sign bit into the register
  3434. SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
  3435. DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
  3436. AddToWorklist(Sign.getNode());
  3437. // Add (N0 < 0) ? abs2 - 1 : 0;
  3438. SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
  3439. AddToWorklist(Srl.getNode());
  3440. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
  3441. AddToWorklist(Add.getNode());
  3442. SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
  3443. AddToWorklist(Sra.getNode());
  3444. // Special case: (sdiv X, 1) -> X
  3445. // Special Case: (sdiv X, -1) -> 0-X
  3446. SDValue One = DAG.getConstant(1, DL, VT);
  3447. SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
  3448. SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
  3449. SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
  3450. SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
  3451. Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
  3452. // If dividing by a positive value, we're done. Otherwise, the result must
  3453. // be negated.
  3454. SDValue Zero = DAG.getConstant(0, DL, VT);
  3455. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
  3456. // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
  3457. SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
  3458. SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
  3459. return Res;
  3460. }
  3461. // If integer divide is expensive and we satisfy the requirements, emit an
  3462. // alternate sequence. Targets may check function attributes for size/speed
  3463. // trade-offs.
  3464. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3465. if (isConstantOrConstantVector(N1) &&
  3466. !TLI.isIntDivCheap(N->getValueType(0), Attr))
  3467. if (SDValue Op = BuildSDIV(N))
  3468. return Op;
  3469. return SDValue();
  3470. }
  3471. SDValue DAGCombiner::visitUDIV(SDNode *N) {
  3472. SDValue N0 = N->getOperand(0);
  3473. SDValue N1 = N->getOperand(1);
  3474. EVT VT = N->getValueType(0);
  3475. EVT CCVT = getSetCCResultType(VT);
  3476. // fold vector ops
  3477. if (VT.isVector())
  3478. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3479. return FoldedVOp;
  3480. SDLoc DL(N);
  3481. // fold (udiv c1, c2) -> c1/c2
  3482. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  3483. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  3484. if (N0C && N1C)
  3485. if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
  3486. N0C, N1C))
  3487. return Folded;
  3488. // fold (udiv X, -1) -> select(X == -1, 1, 0)
  3489. if (N1C && N1C->getAPIntValue().isAllOnesValue())
  3490. return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
  3491. DAG.getConstant(1, DL, VT),
  3492. DAG.getConstant(0, DL, VT));
  3493. if (SDValue V = simplifyDivRem(N, DAG))
  3494. return V;
  3495. if (SDValue NewSel = foldBinOpIntoSelect(N))
  3496. return NewSel;
  3497. if (SDValue V = visitUDIVLike(N0, N1, N)) {
  3498. // If the corresponding remainder node exists, update its users with
  3499. // (Dividend - (Quotient * Divisor).
  3500. if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
  3501. { N0, N1 })) {
  3502. SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
  3503. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
  3504. AddToWorklist(Mul.getNode());
  3505. AddToWorklist(Sub.getNode());
  3506. CombineTo(RemNode, Sub);
  3507. }
  3508. return V;
  3509. }
  3510. // sdiv, srem -> sdivrem
  3511. // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
  3512. // true. Otherwise, we break the simplification logic in visitREM().
  3513. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3514. if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
  3515. if (SDValue DivRem = useDivRem(N))
  3516. return DivRem;
  3517. return SDValue();
  3518. }
  3519. SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
  3520. SDLoc DL(N);
  3521. EVT VT = N->getValueType(0);
  3522. // fold (udiv x, (1 << c)) -> x >>u c
  3523. if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
  3524. DAG.isKnownToBeAPowerOfTwo(N1)) {
  3525. SDValue LogBase2 = BuildLogBase2(N1, DL);
  3526. AddToWorklist(LogBase2.getNode());
  3527. EVT ShiftVT = getShiftAmountTy(N0.getValueType());
  3528. SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
  3529. AddToWorklist(Trunc.getNode());
  3530. return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
  3531. }
  3532. // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
  3533. if (N1.getOpcode() == ISD::SHL) {
  3534. SDValue N10 = N1.getOperand(0);
  3535. if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
  3536. DAG.isKnownToBeAPowerOfTwo(N10)) {
  3537. SDValue LogBase2 = BuildLogBase2(N10, DL);
  3538. AddToWorklist(LogBase2.getNode());
  3539. EVT ADDVT = N1.getOperand(1).getValueType();
  3540. SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
  3541. AddToWorklist(Trunc.getNode());
  3542. SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
  3543. AddToWorklist(Add.getNode());
  3544. return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
  3545. }
  3546. }
  3547. // fold (udiv x, c) -> alternate
  3548. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3549. if (isConstantOrConstantVector(N1) &&
  3550. !TLI.isIntDivCheap(N->getValueType(0), Attr))
  3551. if (SDValue Op = BuildUDIV(N))
  3552. return Op;
  3553. return SDValue();
  3554. }
  3555. // handles ISD::SREM and ISD::UREM
  3556. SDValue DAGCombiner::visitREM(SDNode *N) {
  3557. unsigned Opcode = N->getOpcode();
  3558. SDValue N0 = N->getOperand(0);
  3559. SDValue N1 = N->getOperand(1);
  3560. EVT VT = N->getValueType(0);
  3561. EVT CCVT = getSetCCResultType(VT);
  3562. bool isSigned = (Opcode == ISD::SREM);
  3563. SDLoc DL(N);
  3564. // fold (rem c1, c2) -> c1%c2
  3565. ConstantSDNode *N0C = isConstOrConstSplat(N0);
  3566. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  3567. if (N0C && N1C)
  3568. if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
  3569. return Folded;
  3570. // fold (urem X, -1) -> select(X == -1, 0, x)
  3571. if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
  3572. return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
  3573. DAG.getConstant(0, DL, VT), N0);
  3574. if (SDValue V = simplifyDivRem(N, DAG))
  3575. return V;
  3576. if (SDValue NewSel = foldBinOpIntoSelect(N))
  3577. return NewSel;
  3578. if (isSigned) {
  3579. // If we know the sign bits of both operands are zero, strength reduce to a
  3580. // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
  3581. if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
  3582. return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
  3583. } else {
  3584. SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
  3585. if (DAG.isKnownToBeAPowerOfTwo(N1)) {
  3586. // fold (urem x, pow2) -> (and x, pow2-1)
  3587. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
  3588. AddToWorklist(Add.getNode());
  3589. return DAG.getNode(ISD::AND, DL, VT, N0, Add);
  3590. }
  3591. if (N1.getOpcode() == ISD::SHL &&
  3592. DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
  3593. // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
  3594. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
  3595. AddToWorklist(Add.getNode());
  3596. return DAG.getNode(ISD::AND, DL, VT, N0, Add);
  3597. }
  3598. }
  3599. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3600. // If X/C can be simplified by the division-by-constant logic, lower
  3601. // X%C to the equivalent of X-X/C*C.
  3602. // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
  3603. // speculative DIV must not cause a DIVREM conversion. We guard against this
  3604. // by skipping the simplification if isIntDivCheap(). When div is not cheap,
  3605. // combine will not return a DIVREM. Regardless, checking cheapness here
  3606. // makes sense since the simplification results in fatter code.
  3607. if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
  3608. SDValue OptimizedDiv =
  3609. isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
  3610. if (OptimizedDiv.getNode()) {
  3611. // If the equivalent Div node also exists, update its users.
  3612. unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
  3613. if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
  3614. { N0, N1 }))
  3615. CombineTo(DivNode, OptimizedDiv);
  3616. SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
  3617. SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
  3618. AddToWorklist(OptimizedDiv.getNode());
  3619. AddToWorklist(Mul.getNode());
  3620. return Sub;
  3621. }
  3622. }
  3623. // sdiv, srem -> sdivrem
  3624. if (SDValue DivRem = useDivRem(N))
  3625. return DivRem.getValue(1);
  3626. return SDValue();
  3627. }
  3628. SDValue DAGCombiner::visitMULHS(SDNode *N) {
  3629. SDValue N0 = N->getOperand(0);
  3630. SDValue N1 = N->getOperand(1);
  3631. EVT VT = N->getValueType(0);
  3632. SDLoc DL(N);
  3633. if (VT.isVector()) {
  3634. // fold (mulhs x, 0) -> 0
  3635. // do not return N0/N1, because undef node may exist.
  3636. if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
  3637. ISD::isBuildVectorAllZeros(N1.getNode()))
  3638. return DAG.getConstant(0, DL, VT);
  3639. }
  3640. // fold (mulhs x, 0) -> 0
  3641. if (isNullConstant(N1))
  3642. return N1;
  3643. // fold (mulhs x, 1) -> (sra x, size(x)-1)
  3644. if (isOneConstant(N1))
  3645. return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
  3646. DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
  3647. getShiftAmountTy(N0.getValueType())));
  3648. // fold (mulhs x, undef) -> 0
  3649. if (N0.isUndef() || N1.isUndef())
  3650. return DAG.getConstant(0, DL, VT);
  3651. // If the type twice as wide is legal, transform the mulhs to a wider multiply
  3652. // plus a shift.
  3653. if (VT.isSimple() && !VT.isVector()) {
  3654. MVT Simple = VT.getSimpleVT();
  3655. unsigned SimpleSize = Simple.getSizeInBits();
  3656. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  3657. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  3658. N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
  3659. N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
  3660. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  3661. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  3662. DAG.getConstant(SimpleSize, DL,
  3663. getShiftAmountTy(N1.getValueType())));
  3664. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  3665. }
  3666. }
  3667. return SDValue();
  3668. }
  3669. SDValue DAGCombiner::visitMULHU(SDNode *N) {
  3670. SDValue N0 = N->getOperand(0);
  3671. SDValue N1 = N->getOperand(1);
  3672. EVT VT = N->getValueType(0);
  3673. SDLoc DL(N);
  3674. if (VT.isVector()) {
  3675. // fold (mulhu x, 0) -> 0
  3676. // do not return N0/N1, because undef node may exist.
  3677. if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
  3678. ISD::isBuildVectorAllZeros(N1.getNode()))
  3679. return DAG.getConstant(0, DL, VT);
  3680. }
  3681. // fold (mulhu x, 0) -> 0
  3682. if (isNullConstant(N1))
  3683. return N1;
  3684. // fold (mulhu x, 1) -> 0
  3685. if (isOneConstant(N1))
  3686. return DAG.getConstant(0, DL, N0.getValueType());
  3687. // fold (mulhu x, undef) -> 0
  3688. if (N0.isUndef() || N1.isUndef())
  3689. return DAG.getConstant(0, DL, VT);
  3690. // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
  3691. if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
  3692. DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
  3693. unsigned NumEltBits = VT.getScalarSizeInBits();
  3694. SDValue LogBase2 = BuildLogBase2(N1, DL);
  3695. SDValue SRLAmt = DAG.getNode(
  3696. ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
  3697. EVT ShiftVT = getShiftAmountTy(N0.getValueType());
  3698. SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
  3699. return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
  3700. }
  3701. // If the type twice as wide is legal, transform the mulhu to a wider multiply
  3702. // plus a shift.
  3703. if (VT.isSimple() && !VT.isVector()) {
  3704. MVT Simple = VT.getSimpleVT();
  3705. unsigned SimpleSize = Simple.getSizeInBits();
  3706. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  3707. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  3708. N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
  3709. N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
  3710. N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
  3711. N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
  3712. DAG.getConstant(SimpleSize, DL,
  3713. getShiftAmountTy(N1.getValueType())));
  3714. return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
  3715. }
  3716. }
  3717. return SDValue();
  3718. }
  3719. /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
  3720. /// give the opcodes for the two computations that are being performed. Return
  3721. /// true if a simplification was made.
  3722. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  3723. unsigned HiOp) {
  3724. // If the high half is not needed, just compute the low half.
  3725. bool HiExists = N->hasAnyUseOfValue(1);
  3726. if (!HiExists && (!LegalOperations ||
  3727. TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
  3728. SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
  3729. return CombineTo(N, Res, Res);
  3730. }
  3731. // If the low half is not needed, just compute the high half.
  3732. bool LoExists = N->hasAnyUseOfValue(0);
  3733. if (!LoExists && (!LegalOperations ||
  3734. TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
  3735. SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
  3736. return CombineTo(N, Res, Res);
  3737. }
  3738. // If both halves are used, return as it is.
  3739. if (LoExists && HiExists)
  3740. return SDValue();
  3741. // If the two computed results can be simplified separately, separate them.
  3742. if (LoExists) {
  3743. SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
  3744. AddToWorklist(Lo.getNode());
  3745. SDValue LoOpt = combine(Lo.getNode());
  3746. if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
  3747. (!LegalOperations ||
  3748. TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
  3749. return CombineTo(N, LoOpt, LoOpt);
  3750. }
  3751. if (HiExists) {
  3752. SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
  3753. AddToWorklist(Hi.getNode());
  3754. SDValue HiOpt = combine(Hi.getNode());
  3755. if (HiOpt.getNode() && HiOpt != Hi &&
  3756. (!LegalOperations ||
  3757. TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
  3758. return CombineTo(N, HiOpt, HiOpt);
  3759. }
  3760. return SDValue();
  3761. }
  3762. SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
  3763. if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
  3764. return Res;
  3765. EVT VT = N->getValueType(0);
  3766. SDLoc DL(N);
  3767. // If the type is twice as wide is legal, transform the mulhu to a wider
  3768. // multiply plus a shift.
  3769. if (VT.isSimple() && !VT.isVector()) {
  3770. MVT Simple = VT.getSimpleVT();
  3771. unsigned SimpleSize = Simple.getSizeInBits();
  3772. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  3773. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  3774. SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
  3775. SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
  3776. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  3777. // Compute the high part as N1.
  3778. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  3779. DAG.getConstant(SimpleSize, DL,
  3780. getShiftAmountTy(Lo.getValueType())));
  3781. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  3782. // Compute the low part as N0.
  3783. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  3784. return CombineTo(N, Lo, Hi);
  3785. }
  3786. }
  3787. return SDValue();
  3788. }
  3789. SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
  3790. if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
  3791. return Res;
  3792. EVT VT = N->getValueType(0);
  3793. SDLoc DL(N);
  3794. // (umul_lohi N0, 0) -> (0, 0)
  3795. if (isNullConstant(N->getOperand(1))) {
  3796. SDValue Zero = DAG.getConstant(0, DL, VT);
  3797. return CombineTo(N, Zero, Zero);
  3798. }
  3799. // (umul_lohi N0, 1) -> (N0, 0)
  3800. if (isOneConstant(N->getOperand(1))) {
  3801. SDValue Zero = DAG.getConstant(0, DL, VT);
  3802. return CombineTo(N, N->getOperand(0), Zero);
  3803. }
  3804. // If the type is twice as wide is legal, transform the mulhu to a wider
  3805. // multiply plus a shift.
  3806. if (VT.isSimple() && !VT.isVector()) {
  3807. MVT Simple = VT.getSimpleVT();
  3808. unsigned SimpleSize = Simple.getSizeInBits();
  3809. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
  3810. if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
  3811. SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
  3812. SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
  3813. Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
  3814. // Compute the high part as N1.
  3815. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
  3816. DAG.getConstant(SimpleSize, DL,
  3817. getShiftAmountTy(Lo.getValueType())));
  3818. Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
  3819. // Compute the low part as N0.
  3820. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
  3821. return CombineTo(N, Lo, Hi);
  3822. }
  3823. }
  3824. return SDValue();
  3825. }
  3826. SDValue DAGCombiner::visitMULO(SDNode *N) {
  3827. SDValue N0 = N->getOperand(0);
  3828. SDValue N1 = N->getOperand(1);
  3829. EVT VT = N0.getValueType();
  3830. bool IsSigned = (ISD::SMULO == N->getOpcode());
  3831. EVT CarryVT = N->getValueType(1);
  3832. SDLoc DL(N);
  3833. // canonicalize constant to RHS.
  3834. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  3835. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  3836. return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
  3837. // fold (mulo x, 0) -> 0 + no carry out
  3838. if (isNullOrNullSplat(N1))
  3839. return CombineTo(N, DAG.getConstant(0, DL, VT),
  3840. DAG.getConstant(0, DL, CarryVT));
  3841. // (mulo x, 2) -> (addo x, x)
  3842. if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
  3843. if (C2->getAPIntValue() == 2)
  3844. return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
  3845. N->getVTList(), N0, N0);
  3846. return SDValue();
  3847. }
  3848. SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
  3849. SDValue N0 = N->getOperand(0);
  3850. SDValue N1 = N->getOperand(1);
  3851. EVT VT = N0.getValueType();
  3852. // fold vector ops
  3853. if (VT.isVector())
  3854. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  3855. return FoldedVOp;
  3856. // fold operation with constant operands.
  3857. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  3858. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  3859. if (N0C && N1C)
  3860. return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
  3861. // canonicalize constant to RHS
  3862. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  3863. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  3864. return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
  3865. // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
  3866. // Only do this if the current op isn't legal and the flipped is.
  3867. unsigned Opcode = N->getOpcode();
  3868. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  3869. if (!TLI.isOperationLegal(Opcode, VT) &&
  3870. (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
  3871. (N1.isUndef() || DAG.SignBitIsZero(N1))) {
  3872. unsigned AltOpcode;
  3873. switch (Opcode) {
  3874. case ISD::SMIN: AltOpcode = ISD::UMIN; break;
  3875. case ISD::SMAX: AltOpcode = ISD::UMAX; break;
  3876. case ISD::UMIN: AltOpcode = ISD::SMIN; break;
  3877. case ISD::UMAX: AltOpcode = ISD::SMAX; break;
  3878. default: llvm_unreachable("Unknown MINMAX opcode");
  3879. }
  3880. if (TLI.isOperationLegal(AltOpcode, VT))
  3881. return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
  3882. }
  3883. return SDValue();
  3884. }
  3885. /// If this is a bitwise logic instruction and both operands have the same
  3886. /// opcode, try to sink the other opcode after the logic instruction.
  3887. SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
  3888. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  3889. EVT VT = N0.getValueType();
  3890. unsigned LogicOpcode = N->getOpcode();
  3891. unsigned HandOpcode = N0.getOpcode();
  3892. assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
  3893. LogicOpcode == ISD::XOR) && "Expected logic opcode");
  3894. assert(HandOpcode == N1.getOpcode() && "Bad input!");
  3895. // Bail early if none of these transforms apply.
  3896. if (N0.getNumOperands() == 0)
  3897. return SDValue();
  3898. // FIXME: We should check number of uses of the operands to not increase
  3899. // the instruction count for all transforms.
  3900. // Handle size-changing casts.
  3901. SDValue X = N0.getOperand(0);
  3902. SDValue Y = N1.getOperand(0);
  3903. EVT XVT = X.getValueType();
  3904. SDLoc DL(N);
  3905. if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
  3906. HandOpcode == ISD::SIGN_EXTEND) {
  3907. // If both operands have other uses, this transform would create extra
  3908. // instructions without eliminating anything.
  3909. if (!N0.hasOneUse() && !N1.hasOneUse())
  3910. return SDValue();
  3911. // We need matching integer source types.
  3912. if (XVT != Y.getValueType())
  3913. return SDValue();
  3914. // Don't create an illegal op during or after legalization. Don't ever
  3915. // create an unsupported vector op.
  3916. if ((VT.isVector() || LegalOperations) &&
  3917. !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
  3918. return SDValue();
  3919. // Avoid infinite looping with PromoteIntBinOp.
  3920. // TODO: Should we apply desirable/legal constraints to all opcodes?
  3921. if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
  3922. !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
  3923. return SDValue();
  3924. // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
  3925. SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
  3926. return DAG.getNode(HandOpcode, DL, VT, Logic);
  3927. }
  3928. // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
  3929. if (HandOpcode == ISD::TRUNCATE) {
  3930. // If both operands have other uses, this transform would create extra
  3931. // instructions without eliminating anything.
  3932. if (!N0.hasOneUse() && !N1.hasOneUse())
  3933. return SDValue();
  3934. // We need matching source types.
  3935. if (XVT != Y.getValueType())
  3936. return SDValue();
  3937. // Don't create an illegal op during or after legalization.
  3938. if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
  3939. return SDValue();
  3940. // Be extra careful sinking truncate. If it's free, there's no benefit in
  3941. // widening a binop. Also, don't create a logic op on an illegal type.
  3942. if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
  3943. return SDValue();
  3944. if (!TLI.isTypeLegal(XVT))
  3945. return SDValue();
  3946. SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
  3947. return DAG.getNode(HandOpcode, DL, VT, Logic);
  3948. }
  3949. // For binops SHL/SRL/SRA/AND:
  3950. // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
  3951. if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
  3952. HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
  3953. N0.getOperand(1) == N1.getOperand(1)) {
  3954. // If either operand has other uses, this transform is not an improvement.
  3955. if (!N0.hasOneUse() || !N1.hasOneUse())
  3956. return SDValue();
  3957. SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
  3958. return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
  3959. }
  3960. // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
  3961. if (HandOpcode == ISD::BSWAP) {
  3962. // If either operand has other uses, this transform is not an improvement.
  3963. if (!N0.hasOneUse() || !N1.hasOneUse())
  3964. return SDValue();
  3965. SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
  3966. return DAG.getNode(HandOpcode, DL, VT, Logic);
  3967. }
  3968. // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
  3969. // Only perform this optimization up until type legalization, before
  3970. // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
  3971. // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
  3972. // we don't want to undo this promotion.
  3973. // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
  3974. // on scalars.
  3975. if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
  3976. Level <= AfterLegalizeTypes) {
  3977. // Input types must be integer and the same.
  3978. if (XVT.isInteger() && XVT == Y.getValueType()) {
  3979. SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
  3980. return DAG.getNode(HandOpcode, DL, VT, Logic);
  3981. }
  3982. }
  3983. // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
  3984. // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
  3985. // If both shuffles use the same mask, and both shuffle within a single
  3986. // vector, then it is worthwhile to move the swizzle after the operation.
  3987. // The type-legalizer generates this pattern when loading illegal
  3988. // vector types from memory. In many cases this allows additional shuffle
  3989. // optimizations.
  3990. // There are other cases where moving the shuffle after the xor/and/or
  3991. // is profitable even if shuffles don't perform a swizzle.
  3992. // If both shuffles use the same mask, and both shuffles have the same first
  3993. // or second operand, then it might still be profitable to move the shuffle
  3994. // after the xor/and/or operation.
  3995. if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
  3996. auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
  3997. auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
  3998. assert(X.getValueType() == Y.getValueType() &&
  3999. "Inputs to shuffles are not the same type");
  4000. // Check that both shuffles use the same mask. The masks are known to be of
  4001. // the same length because the result vector type is the same.
  4002. // Check also that shuffles have only one use to avoid introducing extra
  4003. // instructions.
  4004. if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
  4005. !SVN0->getMask().equals(SVN1->getMask()))
  4006. return SDValue();
  4007. // Don't try to fold this node if it requires introducing a
  4008. // build vector of all zeros that might be illegal at this stage.
  4009. SDValue ShOp = N0.getOperand(1);
  4010. if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
  4011. ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
  4012. // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
  4013. if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
  4014. SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
  4015. N0.getOperand(0), N1.getOperand(0));
  4016. return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
  4017. }
  4018. // Don't try to fold this node if it requires introducing a
  4019. // build vector of all zeros that might be illegal at this stage.
  4020. ShOp = N0.getOperand(0);
  4021. if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
  4022. ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
  4023. // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
  4024. if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
  4025. SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
  4026. N1.getOperand(1));
  4027. return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
  4028. }
  4029. }
  4030. return SDValue();
  4031. }
  4032. /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
  4033. SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
  4034. const SDLoc &DL) {
  4035. SDValue LL, LR, RL, RR, N0CC, N1CC;
  4036. if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
  4037. !isSetCCEquivalent(N1, RL, RR, N1CC))
  4038. return SDValue();
  4039. assert(N0.getValueType() == N1.getValueType() &&
  4040. "Unexpected operand types for bitwise logic op");
  4041. assert(LL.getValueType() == LR.getValueType() &&
  4042. RL.getValueType() == RR.getValueType() &&
  4043. "Unexpected operand types for setcc");
  4044. // If we're here post-legalization or the logic op type is not i1, the logic
  4045. // op type must match a setcc result type. Also, all folds require new
  4046. // operations on the left and right operands, so those types must match.
  4047. EVT VT = N0.getValueType();
  4048. EVT OpVT = LL.getValueType();
  4049. if (LegalOperations || VT.getScalarType() != MVT::i1)
  4050. if (VT != getSetCCResultType(OpVT))
  4051. return SDValue();
  4052. if (OpVT != RL.getValueType())
  4053. return SDValue();
  4054. ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
  4055. ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
  4056. bool IsInteger = OpVT.isInteger();
  4057. if (LR == RR && CC0 == CC1 && IsInteger) {
  4058. bool IsZero = isNullOrNullSplat(LR);
  4059. bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
  4060. // All bits clear?
  4061. bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
  4062. // All sign bits clear?
  4063. bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
  4064. // Any bits set?
  4065. bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
  4066. // Any sign bits set?
  4067. bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
  4068. // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
  4069. // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
  4070. // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
  4071. // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
  4072. if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
  4073. SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
  4074. AddToWorklist(Or.getNode());
  4075. return DAG.getSetCC(DL, VT, Or, LR, CC1);
  4076. }
  4077. // All bits set?
  4078. bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
  4079. // All sign bits set?
  4080. bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
  4081. // Any bits clear?
  4082. bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
  4083. // Any sign bits clear?
  4084. bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
  4085. // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
  4086. // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
  4087. // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
  4088. // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
  4089. if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
  4090. SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
  4091. AddToWorklist(And.getNode());
  4092. return DAG.getSetCC(DL, VT, And, LR, CC1);
  4093. }
  4094. }
  4095. // TODO: What is the 'or' equivalent of this fold?
  4096. // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
  4097. if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
  4098. IsInteger && CC0 == ISD::SETNE &&
  4099. ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
  4100. (isAllOnesConstant(LR) && isNullConstant(RR)))) {
  4101. SDValue One = DAG.getConstant(1, DL, OpVT);
  4102. SDValue Two = DAG.getConstant(2, DL, OpVT);
  4103. SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
  4104. AddToWorklist(Add.getNode());
  4105. return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
  4106. }
  4107. // Try more general transforms if the predicates match and the only user of
  4108. // the compares is the 'and' or 'or'.
  4109. if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
  4110. N0.hasOneUse() && N1.hasOneUse()) {
  4111. // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
  4112. // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
  4113. if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
  4114. SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
  4115. SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
  4116. SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
  4117. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  4118. return DAG.getSetCC(DL, VT, Or, Zero, CC1);
  4119. }
  4120. // Turn compare of constants whose difference is 1 bit into add+and+setcc.
  4121. // TODO - support non-uniform vector amounts.
  4122. if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
  4123. // Match a shared variable operand and 2 non-opaque constant operands.
  4124. ConstantSDNode *C0 = isConstOrConstSplat(LR);
  4125. ConstantSDNode *C1 = isConstOrConstSplat(RR);
  4126. if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
  4127. // Canonicalize larger constant as C0.
  4128. if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
  4129. std::swap(C0, C1);
  4130. // The difference of the constants must be a single bit.
  4131. const APInt &C0Val = C0->getAPIntValue();
  4132. const APInt &C1Val = C1->getAPIntValue();
  4133. if ((C0Val - C1Val).isPowerOf2()) {
  4134. // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
  4135. // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
  4136. SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
  4137. SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
  4138. SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
  4139. SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
  4140. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  4141. return DAG.getSetCC(DL, VT, And, Zero, CC0);
  4142. }
  4143. }
  4144. }
  4145. }
  4146. // Canonicalize equivalent operands to LL == RL.
  4147. if (LL == RR && LR == RL) {
  4148. CC1 = ISD::getSetCCSwappedOperands(CC1);
  4149. std::swap(RL, RR);
  4150. }
  4151. // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
  4152. // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
  4153. if (LL == RL && LR == RR) {
  4154. ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
  4155. : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
  4156. if (NewCC != ISD::SETCC_INVALID &&
  4157. (!LegalOperations ||
  4158. (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
  4159. TLI.isOperationLegal(ISD::SETCC, OpVT))))
  4160. return DAG.getSetCC(DL, VT, LL, LR, NewCC);
  4161. }
  4162. return SDValue();
  4163. }
  4164. /// This contains all DAGCombine rules which reduce two values combined by
  4165. /// an And operation to a single value. This makes them reusable in the context
  4166. /// of visitSELECT(). Rules involving constants are not included as
  4167. /// visitSELECT() already handles those cases.
  4168. SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
  4169. EVT VT = N1.getValueType();
  4170. SDLoc DL(N);
  4171. // fold (and x, undef) -> 0
  4172. if (N0.isUndef() || N1.isUndef())
  4173. return DAG.getConstant(0, DL, VT);
  4174. if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
  4175. return V;
  4176. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
  4177. VT.getSizeInBits() <= 64) {
  4178. if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4179. if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
  4180. // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
  4181. // immediate for an add, but it is legal if its top c2 bits are set,
  4182. // transform the ADD so the immediate doesn't need to be materialized
  4183. // in a register.
  4184. APInt ADDC = ADDI->getAPIntValue();
  4185. APInt SRLC = SRLI->getAPIntValue();
  4186. if (ADDC.getMinSignedBits() <= 64 &&
  4187. SRLC.ult(VT.getSizeInBits()) &&
  4188. !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  4189. APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
  4190. SRLC.getZExtValue());
  4191. if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
  4192. ADDC |= Mask;
  4193. if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
  4194. SDLoc DL0(N0);
  4195. SDValue NewAdd =
  4196. DAG.getNode(ISD::ADD, DL0, VT,
  4197. N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
  4198. CombineTo(N0.getNode(), NewAdd);
  4199. // Return N so it doesn't get rechecked!
  4200. return SDValue(N, 0);
  4201. }
  4202. }
  4203. }
  4204. }
  4205. }
  4206. }
  4207. // Reduce bit extract of low half of an integer to the narrower type.
  4208. // (and (srl i64:x, K), KMask) ->
  4209. // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
  4210. if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
  4211. if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
  4212. if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  4213. unsigned Size = VT.getSizeInBits();
  4214. const APInt &AndMask = CAnd->getAPIntValue();
  4215. unsigned ShiftBits = CShift->getZExtValue();
  4216. // Bail out, this node will probably disappear anyway.
  4217. if (ShiftBits == 0)
  4218. return SDValue();
  4219. unsigned MaskBits = AndMask.countTrailingOnes();
  4220. EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
  4221. if (AndMask.isMask() &&
  4222. // Required bits must not span the two halves of the integer and
  4223. // must fit in the half size type.
  4224. (ShiftBits + MaskBits <= Size / 2) &&
  4225. TLI.isNarrowingProfitable(VT, HalfVT) &&
  4226. TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
  4227. TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
  4228. TLI.isTruncateFree(VT, HalfVT) &&
  4229. TLI.isZExtFree(HalfVT, VT)) {
  4230. // The isNarrowingProfitable is to avoid regressions on PPC and
  4231. // AArch64 which match a few 64-bit bit insert / bit extract patterns
  4232. // on downstream users of this. Those patterns could probably be
  4233. // extended to handle extensions mixed in.
  4234. SDValue SL(N0);
  4235. assert(MaskBits <= Size);
  4236. // Extracting the highest bit of the low half.
  4237. EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
  4238. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
  4239. N0.getOperand(0));
  4240. SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
  4241. SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
  4242. SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
  4243. SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
  4244. return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
  4245. }
  4246. }
  4247. }
  4248. }
  4249. return SDValue();
  4250. }
  4251. bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
  4252. EVT LoadResultTy, EVT &ExtVT) {
  4253. if (!AndC->getAPIntValue().isMask())
  4254. return false;
  4255. unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
  4256. ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
  4257. EVT LoadedVT = LoadN->getMemoryVT();
  4258. if (ExtVT == LoadedVT &&
  4259. (!LegalOperations ||
  4260. TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
  4261. // ZEXTLOAD will match without needing to change the size of the value being
  4262. // loaded.
  4263. return true;
  4264. }
  4265. // Do not change the width of a volatile or atomic loads.
  4266. if (!LoadN->isSimple())
  4267. return false;
  4268. // Do not generate loads of non-round integer types since these can
  4269. // be expensive (and would be wrong if the type is not byte sized).
  4270. if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
  4271. return false;
  4272. if (LegalOperations &&
  4273. !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
  4274. return false;
  4275. if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
  4276. return false;
  4277. return true;
  4278. }
  4279. bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
  4280. ISD::LoadExtType ExtType, EVT &MemVT,
  4281. unsigned ShAmt) {
  4282. if (!LDST)
  4283. return false;
  4284. // Only allow byte offsets.
  4285. if (ShAmt % 8)
  4286. return false;
  4287. // Do not generate loads of non-round integer types since these can
  4288. // be expensive (and would be wrong if the type is not byte sized).
  4289. if (!MemVT.isRound())
  4290. return false;
  4291. // Don't change the width of a volatile or atomic loads.
  4292. if (!LDST->isSimple())
  4293. return false;
  4294. // Verify that we are actually reducing a load width here.
  4295. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
  4296. return false;
  4297. // Ensure that this isn't going to produce an unsupported unaligned access.
  4298. if (ShAmt &&
  4299. !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
  4300. LDST->getAddressSpace(), ShAmt / 8,
  4301. LDST->getMemOperand()->getFlags()))
  4302. return false;
  4303. // It's not possible to generate a constant of extended or untyped type.
  4304. EVT PtrType = LDST->getBasePtr().getValueType();
  4305. if (PtrType == MVT::Untyped || PtrType.isExtended())
  4306. return false;
  4307. if (isa<LoadSDNode>(LDST)) {
  4308. LoadSDNode *Load = cast<LoadSDNode>(LDST);
  4309. // Don't transform one with multiple uses, this would require adding a new
  4310. // load.
  4311. if (!SDValue(Load, 0).hasOneUse())
  4312. return false;
  4313. if (LegalOperations &&
  4314. !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
  4315. return false;
  4316. // For the transform to be legal, the load must produce only two values
  4317. // (the value loaded and the chain). Don't transform a pre-increment
  4318. // load, for example, which produces an extra value. Otherwise the
  4319. // transformation is not equivalent, and the downstream logic to replace
  4320. // uses gets things wrong.
  4321. if (Load->getNumValues() > 2)
  4322. return false;
  4323. // If the load that we're shrinking is an extload and we're not just
  4324. // discarding the extension we can't simply shrink the load. Bail.
  4325. // TODO: It would be possible to merge the extensions in some cases.
  4326. if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
  4327. Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
  4328. return false;
  4329. if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
  4330. return false;
  4331. } else {
  4332. assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
  4333. StoreSDNode *Store = cast<StoreSDNode>(LDST);
  4334. // Can't write outside the original store
  4335. if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
  4336. return false;
  4337. if (LegalOperations &&
  4338. !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
  4339. return false;
  4340. }
  4341. return true;
  4342. }
  4343. bool DAGCombiner::SearchForAndLoads(SDNode *N,
  4344. SmallVectorImpl<LoadSDNode*> &Loads,
  4345. SmallPtrSetImpl<SDNode*> &NodesWithConsts,
  4346. ConstantSDNode *Mask,
  4347. SDNode *&NodeToMask) {
  4348. // Recursively search for the operands, looking for loads which can be
  4349. // narrowed.
  4350. for (SDValue Op : N->op_values()) {
  4351. if (Op.getValueType().isVector())
  4352. return false;
  4353. // Some constants may need fixing up later if they are too large.
  4354. if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
  4355. if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
  4356. (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
  4357. NodesWithConsts.insert(N);
  4358. continue;
  4359. }
  4360. if (!Op.hasOneUse())
  4361. return false;
  4362. switch(Op.getOpcode()) {
  4363. case ISD::LOAD: {
  4364. auto *Load = cast<LoadSDNode>(Op);
  4365. EVT ExtVT;
  4366. if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
  4367. isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
  4368. // ZEXTLOAD is already small enough.
  4369. if (Load->getExtensionType() == ISD::ZEXTLOAD &&
  4370. ExtVT.bitsGE(Load->getMemoryVT()))
  4371. continue;
  4372. // Use LE to convert equal sized loads to zext.
  4373. if (ExtVT.bitsLE(Load->getMemoryVT()))
  4374. Loads.push_back(Load);
  4375. continue;
  4376. }
  4377. return false;
  4378. }
  4379. case ISD::ZERO_EXTEND:
  4380. case ISD::AssertZext: {
  4381. unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
  4382. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
  4383. EVT VT = Op.getOpcode() == ISD::AssertZext ?
  4384. cast<VTSDNode>(Op.getOperand(1))->getVT() :
  4385. Op.getOperand(0).getValueType();
  4386. // We can accept extending nodes if the mask is wider or an equal
  4387. // width to the original type.
  4388. if (ExtVT.bitsGE(VT))
  4389. continue;
  4390. break;
  4391. }
  4392. case ISD::OR:
  4393. case ISD::XOR:
  4394. case ISD::AND:
  4395. if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
  4396. NodeToMask))
  4397. return false;
  4398. continue;
  4399. }
  4400. // Allow one node which will masked along with any loads found.
  4401. if (NodeToMask)
  4402. return false;
  4403. // Also ensure that the node to be masked only produces one data result.
  4404. NodeToMask = Op.getNode();
  4405. if (NodeToMask->getNumValues() > 1) {
  4406. bool HasValue = false;
  4407. for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
  4408. MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
  4409. if (VT != MVT::Glue && VT != MVT::Other) {
  4410. if (HasValue) {
  4411. NodeToMask = nullptr;
  4412. return false;
  4413. }
  4414. HasValue = true;
  4415. }
  4416. }
  4417. assert(HasValue && "Node to be masked has no data result?");
  4418. }
  4419. }
  4420. return true;
  4421. }
  4422. bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
  4423. auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
  4424. if (!Mask)
  4425. return false;
  4426. if (!Mask->getAPIntValue().isMask())
  4427. return false;
  4428. // No need to do anything if the and directly uses a load.
  4429. if (isa<LoadSDNode>(N->getOperand(0)))
  4430. return false;
  4431. SmallVector<LoadSDNode*, 8> Loads;
  4432. SmallPtrSet<SDNode*, 2> NodesWithConsts;
  4433. SDNode *FixupNode = nullptr;
  4434. if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
  4435. if (Loads.size() == 0)
  4436. return false;
  4437. LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
  4438. SDValue MaskOp = N->getOperand(1);
  4439. // If it exists, fixup the single node we allow in the tree that needs
  4440. // masking.
  4441. if (FixupNode) {
  4442. LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
  4443. SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
  4444. FixupNode->getValueType(0),
  4445. SDValue(FixupNode, 0), MaskOp);
  4446. DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
  4447. if (And.getOpcode() == ISD ::AND)
  4448. DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
  4449. }
  4450. // Narrow any constants that need it.
  4451. for (auto *LogicN : NodesWithConsts) {
  4452. SDValue Op0 = LogicN->getOperand(0);
  4453. SDValue Op1 = LogicN->getOperand(1);
  4454. if (isa<ConstantSDNode>(Op0))
  4455. std::swap(Op0, Op1);
  4456. SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
  4457. Op1, MaskOp);
  4458. DAG.UpdateNodeOperands(LogicN, Op0, And);
  4459. }
  4460. // Create narrow loads.
  4461. for (auto *Load : Loads) {
  4462. LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
  4463. SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
  4464. SDValue(Load, 0), MaskOp);
  4465. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
  4466. if (And.getOpcode() == ISD ::AND)
  4467. And = SDValue(
  4468. DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
  4469. SDValue NewLoad = ReduceLoadWidth(And.getNode());
  4470. assert(NewLoad &&
  4471. "Shouldn't be masking the load if it can't be narrowed");
  4472. CombineTo(Load, NewLoad, NewLoad.getValue(1));
  4473. }
  4474. DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
  4475. return true;
  4476. }
  4477. return false;
  4478. }
  4479. // Unfold
  4480. // x & (-1 'logical shift' y)
  4481. // To
  4482. // (x 'opposite logical shift' y) 'logical shift' y
  4483. // if it is better for performance.
  4484. SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
  4485. assert(N->getOpcode() == ISD::AND);
  4486. SDValue N0 = N->getOperand(0);
  4487. SDValue N1 = N->getOperand(1);
  4488. // Do we actually prefer shifts over mask?
  4489. if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
  4490. return SDValue();
  4491. // Try to match (-1 '[outer] logical shift' y)
  4492. unsigned OuterShift;
  4493. unsigned InnerShift; // The opposite direction to the OuterShift.
  4494. SDValue Y; // Shift amount.
  4495. auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
  4496. if (!M.hasOneUse())
  4497. return false;
  4498. OuterShift = M->getOpcode();
  4499. if (OuterShift == ISD::SHL)
  4500. InnerShift = ISD::SRL;
  4501. else if (OuterShift == ISD::SRL)
  4502. InnerShift = ISD::SHL;
  4503. else
  4504. return false;
  4505. if (!isAllOnesConstant(M->getOperand(0)))
  4506. return false;
  4507. Y = M->getOperand(1);
  4508. return true;
  4509. };
  4510. SDValue X;
  4511. if (matchMask(N1))
  4512. X = N0;
  4513. else if (matchMask(N0))
  4514. X = N1;
  4515. else
  4516. return SDValue();
  4517. SDLoc DL(N);
  4518. EVT VT = N->getValueType(0);
  4519. // tmp = x 'opposite logical shift' y
  4520. SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
  4521. // ret = tmp 'logical shift' y
  4522. SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
  4523. return T1;
  4524. }
  4525. /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
  4526. /// For a target with a bit test, this is expected to become test + set and save
  4527. /// at least 1 instruction.
  4528. static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
  4529. assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
  4530. // This is probably not worthwhile without a supported type.
  4531. EVT VT = And->getValueType(0);
  4532. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  4533. if (!TLI.isTypeLegal(VT))
  4534. return SDValue();
  4535. // Look through an optional extension and find a 'not'.
  4536. // TODO: Should we favor test+set even without the 'not' op?
  4537. SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
  4538. if (Not.getOpcode() == ISD::ANY_EXTEND)
  4539. Not = Not.getOperand(0);
  4540. if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
  4541. return SDValue();
  4542. // Look though an optional truncation. The source operand may not be the same
  4543. // type as the original 'and', but that is ok because we are masking off
  4544. // everything but the low bit.
  4545. SDValue Srl = Not.getOperand(0);
  4546. if (Srl.getOpcode() == ISD::TRUNCATE)
  4547. Srl = Srl.getOperand(0);
  4548. // Match a shift-right by constant.
  4549. if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
  4550. !isa<ConstantSDNode>(Srl.getOperand(1)))
  4551. return SDValue();
  4552. // We might have looked through casts that make this transform invalid.
  4553. // TODO: If the source type is wider than the result type, do the mask and
  4554. // compare in the source type.
  4555. const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
  4556. unsigned VTBitWidth = VT.getSizeInBits();
  4557. if (ShiftAmt.uge(VTBitWidth))
  4558. return SDValue();
  4559. // Turn this into a bit-test pattern using mask op + setcc:
  4560. // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
  4561. SDLoc DL(And);
  4562. SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
  4563. EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4564. SDValue Mask = DAG.getConstant(
  4565. APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
  4566. SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
  4567. SDValue Zero = DAG.getConstant(0, DL, VT);
  4568. SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
  4569. return DAG.getZExtOrTrunc(Setcc, DL, VT);
  4570. }
  4571. SDValue DAGCombiner::visitAND(SDNode *N) {
  4572. SDValue N0 = N->getOperand(0);
  4573. SDValue N1 = N->getOperand(1);
  4574. EVT VT = N1.getValueType();
  4575. // x & x --> x
  4576. if (N0 == N1)
  4577. return N0;
  4578. // fold vector ops
  4579. if (VT.isVector()) {
  4580. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  4581. return FoldedVOp;
  4582. // fold (and x, 0) -> 0, vector edition
  4583. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  4584. // do not return N0, because undef node may exist in N0
  4585. return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
  4586. SDLoc(N), N0.getValueType());
  4587. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  4588. // do not return N1, because undef node may exist in N1
  4589. return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
  4590. SDLoc(N), N1.getValueType());
  4591. // fold (and x, -1) -> x, vector edition
  4592. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  4593. return N1;
  4594. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  4595. return N0;
  4596. }
  4597. // fold (and c1, c2) -> c1&c2
  4598. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  4599. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  4600. if (N0C && N1C && !N1C->isOpaque())
  4601. return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
  4602. // canonicalize constant to RHS
  4603. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  4604. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  4605. return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
  4606. // fold (and x, -1) -> x
  4607. if (isAllOnesConstant(N1))
  4608. return N0;
  4609. // if (and x, c) is known to be zero, return 0
  4610. unsigned BitWidth = VT.getScalarSizeInBits();
  4611. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  4612. APInt::getAllOnesValue(BitWidth)))
  4613. return DAG.getConstant(0, SDLoc(N), VT);
  4614. if (SDValue NewSel = foldBinOpIntoSelect(N))
  4615. return NewSel;
  4616. // reassociate and
  4617. if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
  4618. return RAND;
  4619. // Try to convert a constant mask AND into a shuffle clear mask.
  4620. if (VT.isVector())
  4621. if (SDValue Shuffle = XformToShuffleWithZero(N))
  4622. return Shuffle;
  4623. // fold (and (or x, C), D) -> D if (C & D) == D
  4624. auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
  4625. return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
  4626. };
  4627. if (N0.getOpcode() == ISD::OR &&
  4628. ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
  4629. return N1;
  4630. // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
  4631. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  4632. SDValue N0Op0 = N0.getOperand(0);
  4633. APInt Mask = ~N1C->getAPIntValue();
  4634. Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
  4635. if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
  4636. SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
  4637. N0.getValueType(), N0Op0);
  4638. // Replace uses of the AND with uses of the Zero extend node.
  4639. CombineTo(N, Zext);
  4640. // We actually want to replace all uses of the any_extend with the
  4641. // zero_extend, to avoid duplicating things. This will later cause this
  4642. // AND to be folded.
  4643. CombineTo(N0.getNode(), Zext);
  4644. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4645. }
  4646. }
  4647. // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
  4648. // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
  4649. // already be zero by virtue of the width of the base type of the load.
  4650. //
  4651. // the 'X' node here can either be nothing or an extract_vector_elt to catch
  4652. // more cases.
  4653. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  4654. N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
  4655. N0.getOperand(0).getOpcode() == ISD::LOAD &&
  4656. N0.getOperand(0).getResNo() == 0) ||
  4657. (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
  4658. LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
  4659. N0 : N0.getOperand(0) );
  4660. // Get the constant (if applicable) the zero'th operand is being ANDed with.
  4661. // This can be a pure constant or a vector splat, in which case we treat the
  4662. // vector as a scalar and use the splat value.
  4663. APInt Constant = APInt::getNullValue(1);
  4664. if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
  4665. Constant = C->getAPIntValue();
  4666. } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
  4667. APInt SplatValue, SplatUndef;
  4668. unsigned SplatBitSize;
  4669. bool HasAnyUndefs;
  4670. bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
  4671. SplatBitSize, HasAnyUndefs);
  4672. if (IsSplat) {
  4673. // Undef bits can contribute to a possible optimisation if set, so
  4674. // set them.
  4675. SplatValue |= SplatUndef;
  4676. // The splat value may be something like "0x00FFFFFF", which means 0 for
  4677. // the first vector value and FF for the rest, repeating. We need a mask
  4678. // that will apply equally to all members of the vector, so AND all the
  4679. // lanes of the constant together.
  4680. unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
  4681. // If the splat value has been compressed to a bitlength lower
  4682. // than the size of the vector lane, we need to re-expand it to
  4683. // the lane size.
  4684. if (EltBitWidth > SplatBitSize)
  4685. for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
  4686. SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
  4687. SplatValue |= SplatValue.shl(SplatBitSize);
  4688. // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
  4689. // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
  4690. if ((SplatBitSize % EltBitWidth) == 0) {
  4691. Constant = APInt::getAllOnesValue(EltBitWidth);
  4692. for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
  4693. Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
  4694. }
  4695. }
  4696. }
  4697. // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
  4698. // actually legal and isn't going to get expanded, else this is a false
  4699. // optimisation.
  4700. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
  4701. Load->getValueType(0),
  4702. Load->getMemoryVT());
  4703. // Resize the constant to the same size as the original memory access before
  4704. // extension. If it is still the AllOnesValue then this AND is completely
  4705. // unneeded.
  4706. Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
  4707. bool B;
  4708. switch (Load->getExtensionType()) {
  4709. default: B = false; break;
  4710. case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
  4711. case ISD::ZEXTLOAD:
  4712. case ISD::NON_EXTLOAD: B = true; break;
  4713. }
  4714. if (B && Constant.isAllOnesValue()) {
  4715. // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
  4716. // preserve semantics once we get rid of the AND.
  4717. SDValue NewLoad(Load, 0);
  4718. // Fold the AND away. NewLoad may get replaced immediately.
  4719. CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
  4720. if (Load->getExtensionType() == ISD::EXTLOAD) {
  4721. NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
  4722. Load->getValueType(0), SDLoc(Load),
  4723. Load->getChain(), Load->getBasePtr(),
  4724. Load->getOffset(), Load->getMemoryVT(),
  4725. Load->getMemOperand());
  4726. // Replace uses of the EXTLOAD with the new ZEXTLOAD.
  4727. if (Load->getNumValues() == 3) {
  4728. // PRE/POST_INC loads have 3 values.
  4729. SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
  4730. NewLoad.getValue(2) };
  4731. CombineTo(Load, To, 3, true);
  4732. } else {
  4733. CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
  4734. }
  4735. }
  4736. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4737. }
  4738. }
  4739. // fold (and (load x), 255) -> (zextload x, i8)
  4740. // fold (and (extload x, i16), 255) -> (zextload x, i8)
  4741. // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
  4742. if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
  4743. (N0.getOpcode() == ISD::ANY_EXTEND &&
  4744. N0.getOperand(0).getOpcode() == ISD::LOAD))) {
  4745. if (SDValue Res = ReduceLoadWidth(N)) {
  4746. LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
  4747. ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
  4748. AddToWorklist(N);
  4749. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
  4750. return SDValue(N, 0);
  4751. }
  4752. }
  4753. if (Level >= AfterLegalizeTypes) {
  4754. // Attempt to propagate the AND back up to the leaves which, if they're
  4755. // loads, can be combined to narrow loads and the AND node can be removed.
  4756. // Perform after legalization so that extend nodes will already be
  4757. // combined into the loads.
  4758. if (BackwardsPropagateMask(N, DAG)) {
  4759. return SDValue(N, 0);
  4760. }
  4761. }
  4762. if (SDValue Combined = visitANDLike(N0, N1, N))
  4763. return Combined;
  4764. // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
  4765. if (N0.getOpcode() == N1.getOpcode())
  4766. if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
  4767. return V;
  4768. // Masking the negated extension of a boolean is just the zero-extended
  4769. // boolean:
  4770. // and (sub 0, zext(bool X)), 1 --> zext(bool X)
  4771. // and (sub 0, sext(bool X)), 1 --> zext(bool X)
  4772. //
  4773. // Note: the SimplifyDemandedBits fold below can make an information-losing
  4774. // transform, and then we have no way to find this better fold.
  4775. if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
  4776. if (isNullOrNullSplat(N0.getOperand(0))) {
  4777. SDValue SubRHS = N0.getOperand(1);
  4778. if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
  4779. SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
  4780. return SubRHS;
  4781. if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
  4782. SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
  4783. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
  4784. }
  4785. }
  4786. // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
  4787. // fold (and (sra)) -> (and (srl)) when possible.
  4788. if (SimplifyDemandedBits(SDValue(N, 0)))
  4789. return SDValue(N, 0);
  4790. // fold (zext_inreg (extload x)) -> (zextload x)
  4791. // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
  4792. if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
  4793. (ISD::isEXTLoad(N0.getNode()) ||
  4794. (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
  4795. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  4796. EVT MemVT = LN0->getMemoryVT();
  4797. // If we zero all the possible extended bits, then we can turn this into
  4798. // a zextload if we are running before legalize or the operation is legal.
  4799. unsigned ExtBitSize = N1.getScalarValueSizeInBits();
  4800. unsigned MemBitSize = MemVT.getScalarSizeInBits();
  4801. APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
  4802. if (DAG.MaskedValueIsZero(N1, ExtBits) &&
  4803. ((!LegalOperations && LN0->isSimple()) ||
  4804. TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
  4805. SDValue ExtLoad =
  4806. DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
  4807. LN0->getBasePtr(), MemVT, LN0->getMemOperand());
  4808. AddToWorklist(N);
  4809. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  4810. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  4811. }
  4812. }
  4813. // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
  4814. if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
  4815. if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  4816. N0.getOperand(1), false))
  4817. return BSwap;
  4818. }
  4819. if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
  4820. return Shifts;
  4821. if (TLI.hasBitTest(N0, N1))
  4822. if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
  4823. return V;
  4824. return SDValue();
  4825. }
  4826. /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
  4827. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
  4828. bool DemandHighBits) {
  4829. if (!LegalOperations)
  4830. return SDValue();
  4831. EVT VT = N->getValueType(0);
  4832. if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
  4833. return SDValue();
  4834. if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
  4835. return SDValue();
  4836. // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
  4837. bool LookPassAnd0 = false;
  4838. bool LookPassAnd1 = false;
  4839. if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
  4840. std::swap(N0, N1);
  4841. if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
  4842. std::swap(N0, N1);
  4843. if (N0.getOpcode() == ISD::AND) {
  4844. if (!N0.getNode()->hasOneUse())
  4845. return SDValue();
  4846. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4847. // Also handle 0xffff since the LHS is guaranteed to have zeros there.
  4848. // This is needed for X86.
  4849. if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
  4850. N01C->getZExtValue() != 0xFFFF))
  4851. return SDValue();
  4852. N0 = N0.getOperand(0);
  4853. LookPassAnd0 = true;
  4854. }
  4855. if (N1.getOpcode() == ISD::AND) {
  4856. if (!N1.getNode()->hasOneUse())
  4857. return SDValue();
  4858. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  4859. if (!N11C || N11C->getZExtValue() != 0xFF)
  4860. return SDValue();
  4861. N1 = N1.getOperand(0);
  4862. LookPassAnd1 = true;
  4863. }
  4864. if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
  4865. std::swap(N0, N1);
  4866. if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
  4867. return SDValue();
  4868. if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
  4869. return SDValue();
  4870. ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4871. ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
  4872. if (!N01C || !N11C)
  4873. return SDValue();
  4874. if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
  4875. return SDValue();
  4876. // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
  4877. SDValue N00 = N0->getOperand(0);
  4878. if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
  4879. if (!N00.getNode()->hasOneUse())
  4880. return SDValue();
  4881. ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
  4882. if (!N001C || N001C->getZExtValue() != 0xFF)
  4883. return SDValue();
  4884. N00 = N00.getOperand(0);
  4885. LookPassAnd0 = true;
  4886. }
  4887. SDValue N10 = N1->getOperand(0);
  4888. if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
  4889. if (!N10.getNode()->hasOneUse())
  4890. return SDValue();
  4891. ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
  4892. // Also allow 0xFFFF since the bits will be shifted out. This is needed
  4893. // for X86.
  4894. if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
  4895. N101C->getZExtValue() != 0xFFFF))
  4896. return SDValue();
  4897. N10 = N10.getOperand(0);
  4898. LookPassAnd1 = true;
  4899. }
  4900. if (N00 != N10)
  4901. return SDValue();
  4902. // Make sure everything beyond the low halfword gets set to zero since the SRL
  4903. // 16 will clear the top bits.
  4904. unsigned OpSizeInBits = VT.getSizeInBits();
  4905. if (DemandHighBits && OpSizeInBits > 16) {
  4906. // If the left-shift isn't masked out then the only way this is a bswap is
  4907. // if all bits beyond the low 8 are 0. In that case the entire pattern
  4908. // reduces to a left shift anyway: leave it for other parts of the combiner.
  4909. if (!LookPassAnd0)
  4910. return SDValue();
  4911. // However, if the right shift isn't masked out then it might be because
  4912. // it's not needed. See if we can spot that too.
  4913. if (!LookPassAnd1 &&
  4914. !DAG.MaskedValueIsZero(
  4915. N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
  4916. return SDValue();
  4917. }
  4918. SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
  4919. if (OpSizeInBits > 16) {
  4920. SDLoc DL(N);
  4921. Res = DAG.getNode(ISD::SRL, DL, VT, Res,
  4922. DAG.getConstant(OpSizeInBits - 16, DL,
  4923. getShiftAmountTy(VT)));
  4924. }
  4925. return Res;
  4926. }
  4927. /// Return true if the specified node is an element that makes up a 32-bit
  4928. /// packed halfword byteswap.
  4929. /// ((x & 0x000000ff) << 8) |
  4930. /// ((x & 0x0000ff00) >> 8) |
  4931. /// ((x & 0x00ff0000) << 8) |
  4932. /// ((x & 0xff000000) >> 8)
  4933. static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
  4934. if (!N.getNode()->hasOneUse())
  4935. return false;
  4936. unsigned Opc = N.getOpcode();
  4937. if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
  4938. return false;
  4939. SDValue N0 = N.getOperand(0);
  4940. unsigned Opc0 = N0.getOpcode();
  4941. if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
  4942. return false;
  4943. ConstantSDNode *N1C = nullptr;
  4944. // SHL or SRL: look upstream for AND mask operand
  4945. if (Opc == ISD::AND)
  4946. N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  4947. else if (Opc0 == ISD::AND)
  4948. N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4949. if (!N1C)
  4950. return false;
  4951. unsigned MaskByteOffset;
  4952. switch (N1C->getZExtValue()) {
  4953. default:
  4954. return false;
  4955. case 0xFF: MaskByteOffset = 0; break;
  4956. case 0xFF00: MaskByteOffset = 1; break;
  4957. case 0xFFFF:
  4958. // In case demanded bits didn't clear the bits that will be shifted out.
  4959. // This is needed for X86.
  4960. if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
  4961. MaskByteOffset = 1;
  4962. break;
  4963. }
  4964. return false;
  4965. case 0xFF0000: MaskByteOffset = 2; break;
  4966. case 0xFF000000: MaskByteOffset = 3; break;
  4967. }
  4968. // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
  4969. if (Opc == ISD::AND) {
  4970. if (MaskByteOffset == 0 || MaskByteOffset == 2) {
  4971. // (x >> 8) & 0xff
  4972. // (x >> 8) & 0xff0000
  4973. if (Opc0 != ISD::SRL)
  4974. return false;
  4975. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4976. if (!C || C->getZExtValue() != 8)
  4977. return false;
  4978. } else {
  4979. // (x << 8) & 0xff00
  4980. // (x << 8) & 0xff000000
  4981. if (Opc0 != ISD::SHL)
  4982. return false;
  4983. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4984. if (!C || C->getZExtValue() != 8)
  4985. return false;
  4986. }
  4987. } else if (Opc == ISD::SHL) {
  4988. // (x & 0xff) << 8
  4989. // (x & 0xff0000) << 8
  4990. if (MaskByteOffset != 0 && MaskByteOffset != 2)
  4991. return false;
  4992. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  4993. if (!C || C->getZExtValue() != 8)
  4994. return false;
  4995. } else { // Opc == ISD::SRL
  4996. // (x & 0xff00) >> 8
  4997. // (x & 0xff000000) >> 8
  4998. if (MaskByteOffset != 1 && MaskByteOffset != 3)
  4999. return false;
  5000. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
  5001. if (!C || C->getZExtValue() != 8)
  5002. return false;
  5003. }
  5004. if (Parts[MaskByteOffset])
  5005. return false;
  5006. Parts[MaskByteOffset] = N0.getOperand(0).getNode();
  5007. return true;
  5008. }
  5009. /// Match a 32-bit packed halfword bswap. That is
  5010. /// ((x & 0x000000ff) << 8) |
  5011. /// ((x & 0x0000ff00) >> 8) |
  5012. /// ((x & 0x00ff0000) << 8) |
  5013. /// ((x & 0xff000000) >> 8)
  5014. /// => (rotl (bswap x), 16)
  5015. SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
  5016. if (!LegalOperations)
  5017. return SDValue();
  5018. EVT VT = N->getValueType(0);
  5019. if (VT != MVT::i32)
  5020. return SDValue();
  5021. if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
  5022. return SDValue();
  5023. // Look for either
  5024. // (or (or (and), (and)), (or (and), (and)))
  5025. // (or (or (or (and), (and)), (and)), (and))
  5026. if (N0.getOpcode() != ISD::OR)
  5027. return SDValue();
  5028. SDValue N00 = N0.getOperand(0);
  5029. SDValue N01 = N0.getOperand(1);
  5030. SDNode *Parts[4] = {};
  5031. if (N1.getOpcode() == ISD::OR &&
  5032. N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
  5033. // (or (or (and), (and)), (or (and), (and)))
  5034. if (!isBSwapHWordElement(N00, Parts))
  5035. return SDValue();
  5036. if (!isBSwapHWordElement(N01, Parts))
  5037. return SDValue();
  5038. SDValue N10 = N1.getOperand(0);
  5039. if (!isBSwapHWordElement(N10, Parts))
  5040. return SDValue();
  5041. SDValue N11 = N1.getOperand(1);
  5042. if (!isBSwapHWordElement(N11, Parts))
  5043. return SDValue();
  5044. } else {
  5045. // (or (or (or (and), (and)), (and)), (and))
  5046. if (!isBSwapHWordElement(N1, Parts))
  5047. return SDValue();
  5048. if (!isBSwapHWordElement(N01, Parts))
  5049. return SDValue();
  5050. if (N00.getOpcode() != ISD::OR)
  5051. return SDValue();
  5052. SDValue N000 = N00.getOperand(0);
  5053. if (!isBSwapHWordElement(N000, Parts))
  5054. return SDValue();
  5055. SDValue N001 = N00.getOperand(1);
  5056. if (!isBSwapHWordElement(N001, Parts))
  5057. return SDValue();
  5058. }
  5059. // Make sure the parts are all coming from the same node.
  5060. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
  5061. return SDValue();
  5062. SDLoc DL(N);
  5063. SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
  5064. SDValue(Parts[0], 0));
  5065. // Result of the bswap should be rotated by 16. If it's not legal, then
  5066. // do (x << 16) | (x >> 16).
  5067. SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
  5068. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
  5069. return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
  5070. if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
  5071. return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
  5072. return DAG.getNode(ISD::OR, DL, VT,
  5073. DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
  5074. DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
  5075. }
  5076. /// This contains all DAGCombine rules which reduce two values combined by
  5077. /// an Or operation to a single value \see visitANDLike().
  5078. SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
  5079. EVT VT = N1.getValueType();
  5080. SDLoc DL(N);
  5081. // fold (or x, undef) -> -1
  5082. if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
  5083. return DAG.getAllOnesConstant(DL, VT);
  5084. if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
  5085. return V;
  5086. // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
  5087. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
  5088. // Don't increase # computations.
  5089. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
  5090. // We can only do this xform if we know that bits from X that are set in C2
  5091. // but not in C1 are already zero. Likewise for Y.
  5092. if (const ConstantSDNode *N0O1C =
  5093. getAsNonOpaqueConstant(N0.getOperand(1))) {
  5094. if (const ConstantSDNode *N1O1C =
  5095. getAsNonOpaqueConstant(N1.getOperand(1))) {
  5096. // We can only do this xform if we know that bits from X that are set in
  5097. // C2 but not in C1 are already zero. Likewise for Y.
  5098. const APInt &LHSMask = N0O1C->getAPIntValue();
  5099. const APInt &RHSMask = N1O1C->getAPIntValue();
  5100. if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
  5101. DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
  5102. SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
  5103. N0.getOperand(0), N1.getOperand(0));
  5104. return DAG.getNode(ISD::AND, DL, VT, X,
  5105. DAG.getConstant(LHSMask | RHSMask, DL, VT));
  5106. }
  5107. }
  5108. }
  5109. }
  5110. // (or (and X, M), (and X, N)) -> (and X, (or M, N))
  5111. if (N0.getOpcode() == ISD::AND &&
  5112. N1.getOpcode() == ISD::AND &&
  5113. N0.getOperand(0) == N1.getOperand(0) &&
  5114. // Don't increase # computations.
  5115. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
  5116. SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
  5117. N0.getOperand(1), N1.getOperand(1));
  5118. return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
  5119. }
  5120. return SDValue();
  5121. }
  5122. /// OR combines for which the commuted variant will be tried as well.
  5123. static SDValue visitORCommutative(
  5124. SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
  5125. EVT VT = N0.getValueType();
  5126. if (N0.getOpcode() == ISD::AND) {
  5127. // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
  5128. if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
  5129. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
  5130. // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
  5131. if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
  5132. return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
  5133. }
  5134. return SDValue();
  5135. }
  5136. SDValue DAGCombiner::visitOR(SDNode *N) {
  5137. SDValue N0 = N->getOperand(0);
  5138. SDValue N1 = N->getOperand(1);
  5139. EVT VT = N1.getValueType();
  5140. // x | x --> x
  5141. if (N0 == N1)
  5142. return N0;
  5143. // fold vector ops
  5144. if (VT.isVector()) {
  5145. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  5146. return FoldedVOp;
  5147. // fold (or x, 0) -> x, vector edition
  5148. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  5149. return N1;
  5150. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  5151. return N0;
  5152. // fold (or x, -1) -> -1, vector edition
  5153. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  5154. // do not return N0, because undef node may exist in N0
  5155. return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
  5156. if (ISD::isBuildVectorAllOnes(N1.getNode()))
  5157. // do not return N1, because undef node may exist in N1
  5158. return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
  5159. // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
  5160. // Do this only if the resulting shuffle is legal.
  5161. if (isa<ShuffleVectorSDNode>(N0) &&
  5162. isa<ShuffleVectorSDNode>(N1) &&
  5163. // Avoid folding a node with illegal type.
  5164. TLI.isTypeLegal(VT)) {
  5165. bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
  5166. bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
  5167. bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
  5168. bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
  5169. // Ensure both shuffles have a zero input.
  5170. if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
  5171. assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
  5172. assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
  5173. const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
  5174. const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
  5175. bool CanFold = true;
  5176. int NumElts = VT.getVectorNumElements();
  5177. SmallVector<int, 4> Mask(NumElts);
  5178. for (int i = 0; i != NumElts; ++i) {
  5179. int M0 = SV0->getMaskElt(i);
  5180. int M1 = SV1->getMaskElt(i);
  5181. // Determine if either index is pointing to a zero vector.
  5182. bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
  5183. bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
  5184. // If one element is zero and the otherside is undef, keep undef.
  5185. // This also handles the case that both are undef.
  5186. if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
  5187. Mask[i] = -1;
  5188. continue;
  5189. }
  5190. // Make sure only one of the elements is zero.
  5191. if (M0Zero == M1Zero) {
  5192. CanFold = false;
  5193. break;
  5194. }
  5195. assert((M0 >= 0 || M1 >= 0) && "Undef index!");
  5196. // We have a zero and non-zero element. If the non-zero came from
  5197. // SV0 make the index a LHS index. If it came from SV1, make it
  5198. // a RHS index. We need to mod by NumElts because we don't care
  5199. // which operand it came from in the original shuffles.
  5200. Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
  5201. }
  5202. if (CanFold) {
  5203. SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
  5204. SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
  5205. SDValue LegalShuffle =
  5206. TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
  5207. Mask, DAG);
  5208. if (LegalShuffle)
  5209. return LegalShuffle;
  5210. }
  5211. }
  5212. }
  5213. }
  5214. // fold (or c1, c2) -> c1|c2
  5215. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  5216. ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  5217. if (N0C && N1C && !N1C->isOpaque())
  5218. return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
  5219. // canonicalize constant to RHS
  5220. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  5221. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  5222. return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
  5223. // fold (or x, 0) -> x
  5224. if (isNullConstant(N1))
  5225. return N0;
  5226. // fold (or x, -1) -> -1
  5227. if (isAllOnesConstant(N1))
  5228. return N1;
  5229. if (SDValue NewSel = foldBinOpIntoSelect(N))
  5230. return NewSel;
  5231. // fold (or x, c) -> c iff (x & ~c) == 0
  5232. if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
  5233. return N1;
  5234. if (SDValue Combined = visitORLike(N0, N1, N))
  5235. return Combined;
  5236. // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
  5237. if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
  5238. return BSwap;
  5239. if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
  5240. return BSwap;
  5241. // reassociate or
  5242. if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
  5243. return ROR;
  5244. // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
  5245. // iff (c1 & c2) != 0 or c1/c2 are undef.
  5246. auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
  5247. return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
  5248. };
  5249. if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
  5250. ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
  5251. if (SDValue COR = DAG.FoldConstantArithmetic(
  5252. ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
  5253. SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
  5254. AddToWorklist(IOR.getNode());
  5255. return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
  5256. }
  5257. }
  5258. if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
  5259. return Combined;
  5260. if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
  5261. return Combined;
  5262. // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
  5263. if (N0.getOpcode() == N1.getOpcode())
  5264. if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
  5265. return V;
  5266. // See if this is some rotate idiom.
  5267. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
  5268. return SDValue(Rot, 0);
  5269. if (SDValue Load = MatchLoadCombine(N))
  5270. return Load;
  5271. // Simplify the operands using demanded-bits information.
  5272. if (SimplifyDemandedBits(SDValue(N, 0)))
  5273. return SDValue(N, 0);
  5274. // If OR can be rewritten into ADD, try combines based on ADD.
  5275. if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
  5276. DAG.haveNoCommonBitsSet(N0, N1))
  5277. if (SDValue Combined = visitADDLike(N))
  5278. return Combined;
  5279. return SDValue();
  5280. }
  5281. static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
  5282. if (Op.getOpcode() == ISD::AND &&
  5283. DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
  5284. Mask = Op.getOperand(1);
  5285. return Op.getOperand(0);
  5286. }
  5287. return Op;
  5288. }
  5289. /// Match "(X shl/srl V1) & V2" where V2 may not be present.
  5290. static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
  5291. SDValue &Mask) {
  5292. Op = stripConstantMask(DAG, Op, Mask);
  5293. if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
  5294. Shift = Op;
  5295. return true;
  5296. }
  5297. return false;
  5298. }
  5299. /// Helper function for visitOR to extract the needed side of a rotate idiom
  5300. /// from a shl/srl/mul/udiv. This is meant to handle cases where
  5301. /// InstCombine merged some outside op with one of the shifts from
  5302. /// the rotate pattern.
  5303. /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
  5304. /// Otherwise, returns an expansion of \p ExtractFrom based on the following
  5305. /// patterns:
  5306. ///
  5307. /// (or (add v v) (shrl v bitwidth-1)):
  5308. /// expands (add v v) -> (shl v 1)
  5309. ///
  5310. /// (or (mul v c0) (shrl (mul v c1) c2)):
  5311. /// expands (mul v c0) -> (shl (mul v c1) c3)
  5312. ///
  5313. /// (or (udiv v c0) (shl (udiv v c1) c2)):
  5314. /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
  5315. ///
  5316. /// (or (shl v c0) (shrl (shl v c1) c2)):
  5317. /// expands (shl v c0) -> (shl (shl v c1) c3)
  5318. ///
  5319. /// (or (shrl v c0) (shl (shrl v c1) c2)):
  5320. /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
  5321. ///
  5322. /// Such that in all cases, c3+c2==bitwidth(op v c1).
  5323. static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
  5324. SDValue ExtractFrom, SDValue &Mask,
  5325. const SDLoc &DL) {
  5326. assert(OppShift && ExtractFrom && "Empty SDValue");
  5327. assert(
  5328. (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
  5329. "Existing shift must be valid as a rotate half");
  5330. ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
  5331. // Value and Type of the shift.
  5332. SDValue OppShiftLHS = OppShift.getOperand(0);
  5333. EVT ShiftedVT = OppShiftLHS.getValueType();
  5334. // Amount of the existing shift.
  5335. ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
  5336. // (add v v) -> (shl v 1)
  5337. if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
  5338. ExtractFrom.getOpcode() == ISD::ADD &&
  5339. ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
  5340. ExtractFrom.getOperand(0) == OppShiftLHS &&
  5341. OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
  5342. return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
  5343. DAG.getShiftAmountConstant(1, ShiftedVT, DL));
  5344. // Preconditions:
  5345. // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
  5346. //
  5347. // Find opcode of the needed shift to be extracted from (op0 v c0).
  5348. unsigned Opcode = ISD::DELETED_NODE;
  5349. bool IsMulOrDiv = false;
  5350. // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
  5351. // opcode or its arithmetic (mul or udiv) variant.
  5352. auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
  5353. IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
  5354. if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
  5355. return false;
  5356. Opcode = NeededShift;
  5357. return true;
  5358. };
  5359. // op0 must be either the needed shift opcode or the mul/udiv equivalent
  5360. // that the needed shift can be extracted from.
  5361. if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
  5362. (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
  5363. return SDValue();
  5364. // op0 must be the same opcode on both sides, have the same LHS argument,
  5365. // and produce the same value type.
  5366. if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
  5367. OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
  5368. ShiftedVT != ExtractFrom.getValueType())
  5369. return SDValue();
  5370. // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
  5371. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
  5372. // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
  5373. ConstantSDNode *ExtractFromCst =
  5374. isConstOrConstSplat(ExtractFrom.getOperand(1));
  5375. // TODO: We should be able to handle non-uniform constant vectors for these values
  5376. // Check that we have constant values.
  5377. if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
  5378. !OppLHSCst || !OppLHSCst->getAPIntValue() ||
  5379. !ExtractFromCst || !ExtractFromCst->getAPIntValue())
  5380. return SDValue();
  5381. // Compute the shift amount we need to extract to complete the rotate.
  5382. const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
  5383. if (OppShiftCst->getAPIntValue().ugt(VTWidth))
  5384. return SDValue();
  5385. APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
  5386. // Normalize the bitwidth of the two mul/udiv/shift constant operands.
  5387. APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
  5388. APInt OppLHSAmt = OppLHSCst->getAPIntValue();
  5389. zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
  5390. // Now try extract the needed shift from the ExtractFrom op and see if the
  5391. // result matches up with the existing shift's LHS op.
  5392. if (IsMulOrDiv) {
  5393. // Op to extract from is a mul or udiv by a constant.
  5394. // Check:
  5395. // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
  5396. // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
  5397. const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
  5398. NeededShiftAmt.getZExtValue());
  5399. APInt ResultAmt;
  5400. APInt Rem;
  5401. APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
  5402. if (Rem != 0 || ResultAmt != OppLHSAmt)
  5403. return SDValue();
  5404. } else {
  5405. // Op to extract from is a shift by a constant.
  5406. // Check:
  5407. // c2 - (bitwidth(op0 v c0) - c1) == c0
  5408. if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
  5409. ExtractFromAmt.getBitWidth()))
  5410. return SDValue();
  5411. }
  5412. // Return the expanded shift op that should allow a rotate to be formed.
  5413. EVT ShiftVT = OppShift.getOperand(1).getValueType();
  5414. EVT ResVT = ExtractFrom.getValueType();
  5415. SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
  5416. return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
  5417. }
  5418. // Return true if we can prove that, whenever Neg and Pos are both in the
  5419. // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
  5420. // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
  5421. //
  5422. // (or (shift1 X, Neg), (shift2 X, Pos))
  5423. //
  5424. // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
  5425. // in direction shift1 by Neg. The range [0, EltSize) means that we only need
  5426. // to consider shift amounts with defined behavior.
  5427. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
  5428. SelectionDAG &DAG) {
  5429. // If EltSize is a power of 2 then:
  5430. //
  5431. // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
  5432. // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
  5433. //
  5434. // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
  5435. // for the stronger condition:
  5436. //
  5437. // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
  5438. //
  5439. // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
  5440. // we can just replace Neg with Neg' for the rest of the function.
  5441. //
  5442. // In other cases we check for the even stronger condition:
  5443. //
  5444. // Neg == EltSize - Pos [B]
  5445. //
  5446. // for all Neg and Pos. Note that the (or ...) then invokes undefined
  5447. // behavior if Pos == 0 (and consequently Neg == EltSize).
  5448. //
  5449. // We could actually use [A] whenever EltSize is a power of 2, but the
  5450. // only extra cases that it would match are those uninteresting ones
  5451. // where Neg and Pos are never in range at the same time. E.g. for
  5452. // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
  5453. // as well as (sub 32, Pos), but:
  5454. //
  5455. // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
  5456. //
  5457. // always invokes undefined behavior for 32-bit X.
  5458. //
  5459. // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
  5460. unsigned MaskLoBits = 0;
  5461. if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
  5462. if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
  5463. KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
  5464. unsigned Bits = Log2_64(EltSize);
  5465. if (NegC->getAPIntValue().getActiveBits() <= Bits &&
  5466. ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
  5467. Neg = Neg.getOperand(0);
  5468. MaskLoBits = Bits;
  5469. }
  5470. }
  5471. }
  5472. // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
  5473. if (Neg.getOpcode() != ISD::SUB)
  5474. return false;
  5475. ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
  5476. if (!NegC)
  5477. return false;
  5478. SDValue NegOp1 = Neg.getOperand(1);
  5479. // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
  5480. // Pos'. The truncation is redundant for the purpose of the equality.
  5481. if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
  5482. if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
  5483. KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
  5484. if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
  5485. ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
  5486. MaskLoBits))
  5487. Pos = Pos.getOperand(0);
  5488. }
  5489. }
  5490. // The condition we need is now:
  5491. //
  5492. // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
  5493. //
  5494. // If NegOp1 == Pos then we need:
  5495. //
  5496. // EltSize & Mask == NegC & Mask
  5497. //
  5498. // (because "x & Mask" is a truncation and distributes through subtraction).
  5499. APInt Width;
  5500. if (Pos == NegOp1)
  5501. Width = NegC->getAPIntValue();
  5502. // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
  5503. // Then the condition we want to prove becomes:
  5504. //
  5505. // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
  5506. //
  5507. // which, again because "x & Mask" is a truncation, becomes:
  5508. //
  5509. // NegC & Mask == (EltSize - PosC) & Mask
  5510. // EltSize & Mask == (NegC + PosC) & Mask
  5511. else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
  5512. if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
  5513. Width = PosC->getAPIntValue() + NegC->getAPIntValue();
  5514. else
  5515. return false;
  5516. } else
  5517. return false;
  5518. // Now we just need to check that EltSize & Mask == Width & Mask.
  5519. if (MaskLoBits)
  5520. // EltSize & Mask is 0 since Mask is EltSize - 1.
  5521. return Width.getLoBits(MaskLoBits) == 0;
  5522. return Width == EltSize;
  5523. }
  5524. // A subroutine of MatchRotate used once we have found an OR of two opposite
  5525. // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
  5526. // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
  5527. // former being preferred if supported. InnerPos and InnerNeg are Pos and
  5528. // Neg with outer conversions stripped away.
  5529. SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
  5530. SDValue Neg, SDValue InnerPos,
  5531. SDValue InnerNeg, unsigned PosOpcode,
  5532. unsigned NegOpcode, const SDLoc &DL) {
  5533. // fold (or (shl x, (*ext y)),
  5534. // (srl x, (*ext (sub 32, y)))) ->
  5535. // (rotl x, y) or (rotr x, (sub 32, y))
  5536. //
  5537. // fold (or (shl x, (*ext (sub 32, y))),
  5538. // (srl x, (*ext y))) ->
  5539. // (rotr x, y) or (rotl x, (sub 32, y))
  5540. EVT VT = Shifted.getValueType();
  5541. if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
  5542. bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
  5543. return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
  5544. HasPos ? Pos : Neg).getNode();
  5545. }
  5546. return nullptr;
  5547. }
  5548. // MatchRotate - Handle an 'or' of two operands. If this is one of the many
  5549. // idioms for rotate, and if the target supports rotation instructions, generate
  5550. // a rot[lr].
  5551. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
  5552. // Must be a legal type. Expanded 'n promoted things won't work with rotates.
  5553. EVT VT = LHS.getValueType();
  5554. if (!TLI.isTypeLegal(VT)) return nullptr;
  5555. // The target must have at least one rotate flavor.
  5556. bool HasROTL = hasOperation(ISD::ROTL, VT);
  5557. bool HasROTR = hasOperation(ISD::ROTR, VT);
  5558. if (!HasROTL && !HasROTR) return nullptr;
  5559. // Check for truncated rotate.
  5560. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
  5561. LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
  5562. assert(LHS.getValueType() == RHS.getValueType());
  5563. if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
  5564. return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
  5565. SDValue(Rot, 0)).getNode();
  5566. }
  5567. }
  5568. // Match "(X shl/srl V1) & V2" where V2 may not be present.
  5569. SDValue LHSShift; // The shift.
  5570. SDValue LHSMask; // AND value if any.
  5571. matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
  5572. SDValue RHSShift; // The shift.
  5573. SDValue RHSMask; // AND value if any.
  5574. matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
  5575. // If neither side matched a rotate half, bail
  5576. if (!LHSShift && !RHSShift)
  5577. return nullptr;
  5578. // InstCombine may have combined a constant shl, srl, mul, or udiv with one
  5579. // side of the rotate, so try to handle that here. In all cases we need to
  5580. // pass the matched shift from the opposite side to compute the opcode and
  5581. // needed shift amount to extract. We still want to do this if both sides
  5582. // matched a rotate half because one half may be a potential overshift that
  5583. // can be broken down (ie if InstCombine merged two shl or srl ops into a
  5584. // single one).
  5585. // Have LHS side of the rotate, try to extract the needed shift from the RHS.
  5586. if (LHSShift)
  5587. if (SDValue NewRHSShift =
  5588. extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
  5589. RHSShift = NewRHSShift;
  5590. // Have RHS side of the rotate, try to extract the needed shift from the LHS.
  5591. if (RHSShift)
  5592. if (SDValue NewLHSShift =
  5593. extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
  5594. LHSShift = NewLHSShift;
  5595. // If a side is still missing, nothing else we can do.
  5596. if (!RHSShift || !LHSShift)
  5597. return nullptr;
  5598. // At this point we've matched or extracted a shift op on each side.
  5599. if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
  5600. return nullptr; // Not shifting the same value.
  5601. if (LHSShift.getOpcode() == RHSShift.getOpcode())
  5602. return nullptr; // Shifts must disagree.
  5603. // Canonicalize shl to left side in a shl/srl pair.
  5604. if (RHSShift.getOpcode() == ISD::SHL) {
  5605. std::swap(LHS, RHS);
  5606. std::swap(LHSShift, RHSShift);
  5607. std::swap(LHSMask, RHSMask);
  5608. }
  5609. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  5610. SDValue LHSShiftArg = LHSShift.getOperand(0);
  5611. SDValue LHSShiftAmt = LHSShift.getOperand(1);
  5612. SDValue RHSShiftArg = RHSShift.getOperand(0);
  5613. SDValue RHSShiftAmt = RHSShift.getOperand(1);
  5614. // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
  5615. // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
  5616. auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
  5617. ConstantSDNode *RHS) {
  5618. return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
  5619. };
  5620. if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
  5621. SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
  5622. LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
  5623. // If there is an AND of either shifted operand, apply it to the result.
  5624. if (LHSMask.getNode() || RHSMask.getNode()) {
  5625. SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
  5626. SDValue Mask = AllOnes;
  5627. if (LHSMask.getNode()) {
  5628. SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
  5629. Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
  5630. DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
  5631. }
  5632. if (RHSMask.getNode()) {
  5633. SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
  5634. Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
  5635. DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
  5636. }
  5637. Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
  5638. }
  5639. return Rot.getNode();
  5640. }
  5641. // If there is a mask here, and we have a variable shift, we can't be sure
  5642. // that we're masking out the right stuff.
  5643. if (LHSMask.getNode() || RHSMask.getNode())
  5644. return nullptr;
  5645. // If the shift amount is sign/zext/any-extended just peel it off.
  5646. SDValue LExtOp0 = LHSShiftAmt;
  5647. SDValue RExtOp0 = RHSShiftAmt;
  5648. if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  5649. LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  5650. LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  5651. LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
  5652. (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
  5653. RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
  5654. RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
  5655. RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
  5656. LExtOp0 = LHSShiftAmt.getOperand(0);
  5657. RExtOp0 = RHSShiftAmt.getOperand(0);
  5658. }
  5659. SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
  5660. LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
  5661. if (TryL)
  5662. return TryL;
  5663. SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
  5664. RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
  5665. if (TryR)
  5666. return TryR;
  5667. return nullptr;
  5668. }
  5669. namespace {
  5670. /// Represents known origin of an individual byte in load combine pattern. The
  5671. /// value of the byte is either constant zero or comes from memory.
  5672. struct ByteProvider {
  5673. // For constant zero providers Load is set to nullptr. For memory providers
  5674. // Load represents the node which loads the byte from memory.
  5675. // ByteOffset is the offset of the byte in the value produced by the load.
  5676. LoadSDNode *Load = nullptr;
  5677. unsigned ByteOffset = 0;
  5678. ByteProvider() = default;
  5679. static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
  5680. return ByteProvider(Load, ByteOffset);
  5681. }
  5682. static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
  5683. bool isConstantZero() const { return !Load; }
  5684. bool isMemory() const { return Load; }
  5685. bool operator==(const ByteProvider &Other) const {
  5686. return Other.Load == Load && Other.ByteOffset == ByteOffset;
  5687. }
  5688. private:
  5689. ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
  5690. : Load(Load), ByteOffset(ByteOffset) {}
  5691. };
  5692. } // end anonymous namespace
  5693. /// Recursively traverses the expression calculating the origin of the requested
  5694. /// byte of the given value. Returns None if the provider can't be calculated.
  5695. ///
  5696. /// For all the values except the root of the expression verifies that the value
  5697. /// has exactly one use and if it's not true return None. This way if the origin
  5698. /// of the byte is returned it's guaranteed that the values which contribute to
  5699. /// the byte are not used outside of this expression.
  5700. ///
  5701. /// Because the parts of the expression are not allowed to have more than one
  5702. /// use this function iterates over trees, not DAGs. So it never visits the same
  5703. /// node more than once.
  5704. static const Optional<ByteProvider>
  5705. calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
  5706. bool Root = false) {
  5707. // Typical i64 by i8 pattern requires recursion up to 8 calls depth
  5708. if (Depth == 10)
  5709. return None;
  5710. if (!Root && !Op.hasOneUse())
  5711. return None;
  5712. assert(Op.getValueType().isScalarInteger() && "can't handle other types");
  5713. unsigned BitWidth = Op.getValueSizeInBits();
  5714. if (BitWidth % 8 != 0)
  5715. return None;
  5716. unsigned ByteWidth = BitWidth / 8;
  5717. assert(Index < ByteWidth && "invalid index requested");
  5718. (void) ByteWidth;
  5719. switch (Op.getOpcode()) {
  5720. case ISD::OR: {
  5721. auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
  5722. if (!LHS)
  5723. return None;
  5724. auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
  5725. if (!RHS)
  5726. return None;
  5727. if (LHS->isConstantZero())
  5728. return RHS;
  5729. if (RHS->isConstantZero())
  5730. return LHS;
  5731. return None;
  5732. }
  5733. case ISD::SHL: {
  5734. auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
  5735. if (!ShiftOp)
  5736. return None;
  5737. uint64_t BitShift = ShiftOp->getZExtValue();
  5738. if (BitShift % 8 != 0)
  5739. return None;
  5740. uint64_t ByteShift = BitShift / 8;
  5741. return Index < ByteShift
  5742. ? ByteProvider::getConstantZero()
  5743. : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
  5744. Depth + 1);
  5745. }
  5746. case ISD::ANY_EXTEND:
  5747. case ISD::SIGN_EXTEND:
  5748. case ISD::ZERO_EXTEND: {
  5749. SDValue NarrowOp = Op->getOperand(0);
  5750. unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
  5751. if (NarrowBitWidth % 8 != 0)
  5752. return None;
  5753. uint64_t NarrowByteWidth = NarrowBitWidth / 8;
  5754. if (Index >= NarrowByteWidth)
  5755. return Op.getOpcode() == ISD::ZERO_EXTEND
  5756. ? Optional<ByteProvider>(ByteProvider::getConstantZero())
  5757. : None;
  5758. return calculateByteProvider(NarrowOp, Index, Depth + 1);
  5759. }
  5760. case ISD::BSWAP:
  5761. return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
  5762. Depth + 1);
  5763. case ISD::LOAD: {
  5764. auto L = cast<LoadSDNode>(Op.getNode());
  5765. if (!L->isSimple() || L->isIndexed())
  5766. return None;
  5767. unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
  5768. if (NarrowBitWidth % 8 != 0)
  5769. return None;
  5770. uint64_t NarrowByteWidth = NarrowBitWidth / 8;
  5771. if (Index >= NarrowByteWidth)
  5772. return L->getExtensionType() == ISD::ZEXTLOAD
  5773. ? Optional<ByteProvider>(ByteProvider::getConstantZero())
  5774. : None;
  5775. return ByteProvider::getMemory(L, Index);
  5776. }
  5777. }
  5778. return None;
  5779. }
  5780. static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
  5781. return i;
  5782. }
  5783. static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
  5784. return BW - i - 1;
  5785. }
  5786. // Check if the bytes offsets we are looking at match with either big or
  5787. // little endian value loaded. Return true for big endian, false for little
  5788. // endian, and None if match failed.
  5789. static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
  5790. int64_t FirstOffset) {
  5791. // The endian can be decided only when it is 2 bytes at least.
  5792. unsigned Width = ByteOffsets.size();
  5793. if (Width < 2)
  5794. return None;
  5795. bool BigEndian = true, LittleEndian = true;
  5796. for (unsigned i = 0; i < Width; i++) {
  5797. int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
  5798. LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
  5799. BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
  5800. if (!BigEndian && !LittleEndian)
  5801. return None;
  5802. }
  5803. assert((BigEndian != LittleEndian) && "It should be either big endian or"
  5804. "little endian");
  5805. return BigEndian;
  5806. }
  5807. static SDValue stripTruncAndExt(SDValue Value) {
  5808. switch (Value.getOpcode()) {
  5809. case ISD::TRUNCATE:
  5810. case ISD::ZERO_EXTEND:
  5811. case ISD::SIGN_EXTEND:
  5812. case ISD::ANY_EXTEND:
  5813. return stripTruncAndExt(Value.getOperand(0));
  5814. }
  5815. return Value;
  5816. }
  5817. /// Match a pattern where a wide type scalar value is stored by several narrow
  5818. /// stores. Fold it into a single store or a BSWAP and a store if the targets
  5819. /// supports it.
  5820. ///
  5821. /// Assuming little endian target:
  5822. /// i8 *p = ...
  5823. /// i32 val = ...
  5824. /// p[0] = (val >> 0) & 0xFF;
  5825. /// p[1] = (val >> 8) & 0xFF;
  5826. /// p[2] = (val >> 16) & 0xFF;
  5827. /// p[3] = (val >> 24) & 0xFF;
  5828. /// =>
  5829. /// *((i32)p) = val;
  5830. ///
  5831. /// i8 *p = ...
  5832. /// i32 val = ...
  5833. /// p[0] = (val >> 24) & 0xFF;
  5834. /// p[1] = (val >> 16) & 0xFF;
  5835. /// p[2] = (val >> 8) & 0xFF;
  5836. /// p[3] = (val >> 0) & 0xFF;
  5837. /// =>
  5838. /// *((i32)p) = BSWAP(val);
  5839. SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
  5840. // Collect all the stores in the chain.
  5841. SDValue Chain;
  5842. SmallVector<StoreSDNode *, 8> Stores;
  5843. for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
  5844. // TODO: Allow unordered atomics when wider type is legal (see D66309)
  5845. if (Store->getMemoryVT() != MVT::i8 ||
  5846. !Store->isSimple() || Store->isIndexed())
  5847. return SDValue();
  5848. Stores.push_back(Store);
  5849. Chain = Store->getChain();
  5850. }
  5851. // Handle the simple type only.
  5852. unsigned Width = Stores.size();
  5853. EVT VT = EVT::getIntegerVT(
  5854. *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
  5855. if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
  5856. return SDValue();
  5857. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5858. if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
  5859. return SDValue();
  5860. // Check if all the bytes of the combined value we are looking at are stored
  5861. // to the same base address. Collect bytes offsets from Base address into
  5862. // ByteOffsets.
  5863. SDValue CombinedValue;
  5864. SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
  5865. int64_t FirstOffset = INT64_MAX;
  5866. StoreSDNode *FirstStore = nullptr;
  5867. Optional<BaseIndexOffset> Base;
  5868. for (auto Store : Stores) {
  5869. // All the stores store different byte of the CombinedValue. A truncate is
  5870. // required to get that byte value.
  5871. SDValue Trunc = Store->getValue();
  5872. if (Trunc.getOpcode() != ISD::TRUNCATE)
  5873. return SDValue();
  5874. // A shift operation is required to get the right byte offset, except the
  5875. // first byte.
  5876. int64_t Offset = 0;
  5877. SDValue Value = Trunc.getOperand(0);
  5878. if (Value.getOpcode() == ISD::SRL ||
  5879. Value.getOpcode() == ISD::SRA) {
  5880. ConstantSDNode *ShiftOffset =
  5881. dyn_cast<ConstantSDNode>(Value.getOperand(1));
  5882. // Trying to match the following pattern. The shift offset must be
  5883. // a constant and a multiple of 8. It is the byte offset in "y".
  5884. //
  5885. // x = srl y, offset
  5886. // i8 z = trunc x
  5887. // store z, ...
  5888. if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
  5889. return SDValue();
  5890. Offset = ShiftOffset->getSExtValue()/8;
  5891. Value = Value.getOperand(0);
  5892. }
  5893. // Stores must share the same combined value with different offsets.
  5894. if (!CombinedValue)
  5895. CombinedValue = Value;
  5896. else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
  5897. return SDValue();
  5898. // The trunc and all the extend operation should be stripped to get the
  5899. // real value we are stored.
  5900. else if (CombinedValue.getValueType() != VT) {
  5901. if (Value.getValueType() == VT ||
  5902. Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
  5903. CombinedValue = Value;
  5904. // Give up if the combined value type is smaller than the store size.
  5905. if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
  5906. return SDValue();
  5907. }
  5908. // Stores must share the same base address
  5909. BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
  5910. int64_t ByteOffsetFromBase = 0;
  5911. if (!Base)
  5912. Base = Ptr;
  5913. else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
  5914. return SDValue();
  5915. // Remember the first byte store
  5916. if (ByteOffsetFromBase < FirstOffset) {
  5917. FirstStore = Store;
  5918. FirstOffset = ByteOffsetFromBase;
  5919. }
  5920. // Map the offset in the store and the offset in the combined value, and
  5921. // early return if it has been set before.
  5922. if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
  5923. return SDValue();
  5924. ByteOffsets[Offset] = ByteOffsetFromBase;
  5925. }
  5926. assert(FirstOffset != INT64_MAX && "First byte offset must be set");
  5927. assert(FirstStore && "First store must be set");
  5928. // Check if the bytes of the combined value we are looking at match with
  5929. // either big or little endian value store.
  5930. Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
  5931. if (!IsBigEndian.hasValue())
  5932. return SDValue();
  5933. // The node we are looking at matches with the pattern, check if we can
  5934. // replace it with a single bswap if needed and store.
  5935. // If the store needs byte swap check if the target supports it
  5936. bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
  5937. // Before legalize we can introduce illegal bswaps which will be later
  5938. // converted to an explicit bswap sequence. This way we end up with a single
  5939. // store and byte shuffling instead of several stores and byte shuffling.
  5940. if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
  5941. return SDValue();
  5942. // Check that a store of the wide type is both allowed and fast on the target
  5943. bool Fast = false;
  5944. bool Allowed =
  5945. TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
  5946. *FirstStore->getMemOperand(), &Fast);
  5947. if (!Allowed || !Fast)
  5948. return SDValue();
  5949. if (VT != CombinedValue.getValueType()) {
  5950. assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
  5951. "Get unexpected store value to combine");
  5952. CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
  5953. CombinedValue);
  5954. }
  5955. if (NeedsBswap)
  5956. CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
  5957. SDValue NewStore =
  5958. DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
  5959. FirstStore->getPointerInfo(), FirstStore->getAlignment());
  5960. // Rely on other DAG combine rules to remove the other individual stores.
  5961. DAG.ReplaceAllUsesWith(N, NewStore.getNode());
  5962. return NewStore;
  5963. }
  5964. /// Match a pattern where a wide type scalar value is loaded by several narrow
  5965. /// loads and combined by shifts and ors. Fold it into a single load or a load
  5966. /// and a BSWAP if the targets supports it.
  5967. ///
  5968. /// Assuming little endian target:
  5969. /// i8 *a = ...
  5970. /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
  5971. /// =>
  5972. /// i32 val = *((i32)a)
  5973. ///
  5974. /// i8 *a = ...
  5975. /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
  5976. /// =>
  5977. /// i32 val = BSWAP(*((i32)a))
  5978. ///
  5979. /// TODO: This rule matches complex patterns with OR node roots and doesn't
  5980. /// interact well with the worklist mechanism. When a part of the pattern is
  5981. /// updated (e.g. one of the loads) its direct users are put into the worklist,
  5982. /// but the root node of the pattern which triggers the load combine is not
  5983. /// necessarily a direct user of the changed node. For example, once the address
  5984. /// of t28 load is reassociated load combine won't be triggered:
  5985. /// t25: i32 = add t4, Constant:i32<2>
  5986. /// t26: i64 = sign_extend t25
  5987. /// t27: i64 = add t2, t26
  5988. /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
  5989. /// t29: i32 = zero_extend t28
  5990. /// t32: i32 = shl t29, Constant:i8<8>
  5991. /// t33: i32 = or t23, t32
  5992. /// As a possible fix visitLoad can check if the load can be a part of a load
  5993. /// combine pattern and add corresponding OR roots to the worklist.
  5994. SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
  5995. assert(N->getOpcode() == ISD::OR &&
  5996. "Can only match load combining against OR nodes");
  5997. // Handles simple types only
  5998. EVT VT = N->getValueType(0);
  5999. if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
  6000. return SDValue();
  6001. unsigned ByteWidth = VT.getSizeInBits() / 8;
  6002. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  6003. // Before legalize we can introduce too wide illegal loads which will be later
  6004. // split into legal sized loads. This enables us to combine i64 load by i8
  6005. // patterns to a couple of i32 loads on 32 bit targets.
  6006. if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
  6007. return SDValue();
  6008. bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
  6009. auto MemoryByteOffset = [&] (ByteProvider P) {
  6010. assert(P.isMemory() && "Must be a memory byte provider");
  6011. unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
  6012. assert(LoadBitWidth % 8 == 0 &&
  6013. "can only analyze providers for individual bytes not bit");
  6014. unsigned LoadByteWidth = LoadBitWidth / 8;
  6015. return IsBigEndianTarget
  6016. ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
  6017. : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
  6018. };
  6019. Optional<BaseIndexOffset> Base;
  6020. SDValue Chain;
  6021. SmallPtrSet<LoadSDNode *, 8> Loads;
  6022. Optional<ByteProvider> FirstByteProvider;
  6023. int64_t FirstOffset = INT64_MAX;
  6024. // Check if all the bytes of the OR we are looking at are loaded from the same
  6025. // base address. Collect bytes offsets from Base address in ByteOffsets.
  6026. SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
  6027. for (unsigned i = 0; i < ByteWidth; i++) {
  6028. auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
  6029. if (!P || !P->isMemory()) // All the bytes must be loaded from memory
  6030. return SDValue();
  6031. LoadSDNode *L = P->Load;
  6032. assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
  6033. !L->isIndexed() &&
  6034. "Must be enforced by calculateByteProvider");
  6035. assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
  6036. // All loads must share the same chain
  6037. SDValue LChain = L->getChain();
  6038. if (!Chain)
  6039. Chain = LChain;
  6040. else if (Chain != LChain)
  6041. return SDValue();
  6042. // Loads must share the same base address
  6043. BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
  6044. int64_t ByteOffsetFromBase = 0;
  6045. if (!Base)
  6046. Base = Ptr;
  6047. else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
  6048. return SDValue();
  6049. // Calculate the offset of the current byte from the base address
  6050. ByteOffsetFromBase += MemoryByteOffset(*P);
  6051. ByteOffsets[i] = ByteOffsetFromBase;
  6052. // Remember the first byte load
  6053. if (ByteOffsetFromBase < FirstOffset) {
  6054. FirstByteProvider = P;
  6055. FirstOffset = ByteOffsetFromBase;
  6056. }
  6057. Loads.insert(L);
  6058. }
  6059. assert(!Loads.empty() && "All the bytes of the value must be loaded from "
  6060. "memory, so there must be at least one load which produces the value");
  6061. assert(Base && "Base address of the accessed memory location must be set");
  6062. assert(FirstOffset != INT64_MAX && "First byte offset must be set");
  6063. // Check if the bytes of the OR we are looking at match with either big or
  6064. // little endian value load
  6065. Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
  6066. if (!IsBigEndian.hasValue())
  6067. return SDValue();
  6068. assert(FirstByteProvider && "must be set");
  6069. // Ensure that the first byte is loaded from zero offset of the first load.
  6070. // So the combined value can be loaded from the first load address.
  6071. if (MemoryByteOffset(*FirstByteProvider) != 0)
  6072. return SDValue();
  6073. LoadSDNode *FirstLoad = FirstByteProvider->Load;
  6074. // The node we are looking at matches with the pattern, check if we can
  6075. // replace it with a single load and bswap if needed.
  6076. // If the load needs byte swap check if the target supports it
  6077. bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
  6078. // Before legalize we can introduce illegal bswaps which will be later
  6079. // converted to an explicit bswap sequence. This way we end up with a single
  6080. // load and byte shuffling instead of several loads and byte shuffling.
  6081. if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
  6082. return SDValue();
  6083. // Check that a load of the wide type is both allowed and fast on the target
  6084. bool Fast = false;
  6085. bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
  6086. VT, *FirstLoad->getMemOperand(), &Fast);
  6087. if (!Allowed || !Fast)
  6088. return SDValue();
  6089. SDValue NewLoad =
  6090. DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
  6091. FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
  6092. // Transfer chain users from old loads to the new load.
  6093. for (LoadSDNode *L : Loads)
  6094. DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
  6095. return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
  6096. }
  6097. // If the target has andn, bsl, or a similar bit-select instruction,
  6098. // we want to unfold masked merge, with canonical pattern of:
  6099. // | A | |B|
  6100. // ((x ^ y) & m) ^ y
  6101. // | D |
  6102. // Into:
  6103. // (x & m) | (y & ~m)
  6104. // If y is a constant, and the 'andn' does not work with immediates,
  6105. // we unfold into a different pattern:
  6106. // ~(~x & m) & (m | y)
  6107. // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
  6108. // the very least that breaks andnpd / andnps patterns, and because those
  6109. // patterns are simplified in IR and shouldn't be created in the DAG
  6110. SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
  6111. assert(N->getOpcode() == ISD::XOR);
  6112. // Don't touch 'not' (i.e. where y = -1).
  6113. if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
  6114. return SDValue();
  6115. EVT VT = N->getValueType(0);
  6116. // There are 3 commutable operators in the pattern,
  6117. // so we have to deal with 8 possible variants of the basic pattern.
  6118. SDValue X, Y, M;
  6119. auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
  6120. if (And.getOpcode() != ISD::AND || !And.hasOneUse())
  6121. return false;
  6122. SDValue Xor = And.getOperand(XorIdx);
  6123. if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
  6124. return false;
  6125. SDValue Xor0 = Xor.getOperand(0);
  6126. SDValue Xor1 = Xor.getOperand(1);
  6127. // Don't touch 'not' (i.e. where y = -1).
  6128. if (isAllOnesOrAllOnesSplat(Xor1))
  6129. return false;
  6130. if (Other == Xor0)
  6131. std::swap(Xor0, Xor1);
  6132. if (Other != Xor1)
  6133. return false;
  6134. X = Xor0;
  6135. Y = Xor1;
  6136. M = And.getOperand(XorIdx ? 0 : 1);
  6137. return true;
  6138. };
  6139. SDValue N0 = N->getOperand(0);
  6140. SDValue N1 = N->getOperand(1);
  6141. if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
  6142. !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
  6143. return SDValue();
  6144. // Don't do anything if the mask is constant. This should not be reachable.
  6145. // InstCombine should have already unfolded this pattern, and DAGCombiner
  6146. // probably shouldn't produce it, too.
  6147. if (isa<ConstantSDNode>(M.getNode()))
  6148. return SDValue();
  6149. // We can transform if the target has AndNot
  6150. if (!TLI.hasAndNot(M))
  6151. return SDValue();
  6152. SDLoc DL(N);
  6153. // If Y is a constant, check that 'andn' works with immediates.
  6154. if (!TLI.hasAndNot(Y)) {
  6155. assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
  6156. // If not, we need to do a bit more work to make sure andn is still used.
  6157. SDValue NotX = DAG.getNOT(DL, X, VT);
  6158. SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
  6159. SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
  6160. SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
  6161. return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
  6162. }
  6163. SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
  6164. SDValue NotM = DAG.getNOT(DL, M, VT);
  6165. SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
  6166. return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
  6167. }
  6168. SDValue DAGCombiner::visitXOR(SDNode *N) {
  6169. SDValue N0 = N->getOperand(0);
  6170. SDValue N1 = N->getOperand(1);
  6171. EVT VT = N0.getValueType();
  6172. // fold vector ops
  6173. if (VT.isVector()) {
  6174. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  6175. return FoldedVOp;
  6176. // fold (xor x, 0) -> x, vector edition
  6177. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  6178. return N1;
  6179. if (ISD::isBuildVectorAllZeros(N1.getNode()))
  6180. return N0;
  6181. }
  6182. // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
  6183. SDLoc DL(N);
  6184. if (N0.isUndef() && N1.isUndef())
  6185. return DAG.getConstant(0, DL, VT);
  6186. // fold (xor x, undef) -> undef
  6187. if (N0.isUndef())
  6188. return N0;
  6189. if (N1.isUndef())
  6190. return N1;
  6191. // fold (xor c1, c2) -> c1^c2
  6192. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  6193. ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
  6194. if (N0C && N1C)
  6195. return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
  6196. // canonicalize constant to RHS
  6197. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  6198. !DAG.isConstantIntBuildVectorOrConstantInt(N1))
  6199. return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
  6200. // fold (xor x, 0) -> x
  6201. if (isNullConstant(N1))
  6202. return N0;
  6203. if (SDValue NewSel = foldBinOpIntoSelect(N))
  6204. return NewSel;
  6205. // reassociate xor
  6206. if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
  6207. return RXOR;
  6208. // fold !(x cc y) -> (x !cc y)
  6209. unsigned N0Opcode = N0.getOpcode();
  6210. SDValue LHS, RHS, CC;
  6211. if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
  6212. ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
  6213. LHS.getValueType().isInteger());
  6214. if (!LegalOperations ||
  6215. TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
  6216. switch (N0Opcode) {
  6217. default:
  6218. llvm_unreachable("Unhandled SetCC Equivalent!");
  6219. case ISD::SETCC:
  6220. return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
  6221. case ISD::SELECT_CC:
  6222. return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
  6223. N0.getOperand(3), NotCC);
  6224. }
  6225. }
  6226. }
  6227. // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
  6228. if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
  6229. isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
  6230. SDValue V = N0.getOperand(0);
  6231. SDLoc DL0(N0);
  6232. V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
  6233. DAG.getConstant(1, DL0, V.getValueType()));
  6234. AddToWorklist(V.getNode());
  6235. return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
  6236. }
  6237. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
  6238. if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
  6239. (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
  6240. SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
  6241. if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
  6242. unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
  6243. N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
  6244. N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
  6245. AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
  6246. return DAG.getNode(NewOpcode, DL, VT, N00, N01);
  6247. }
  6248. }
  6249. // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
  6250. if (isAllOnesConstant(N1) && N0.hasOneUse() &&
  6251. (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
  6252. SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
  6253. if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
  6254. unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
  6255. N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
  6256. N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
  6257. AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
  6258. return DAG.getNode(NewOpcode, DL, VT, N00, N01);
  6259. }
  6260. }
  6261. // fold (not (neg x)) -> (add X, -1)
  6262. // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
  6263. // Y is a constant or the subtract has a single use.
  6264. if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
  6265. isNullConstant(N0.getOperand(0))) {
  6266. return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
  6267. DAG.getAllOnesConstant(DL, VT));
  6268. }
  6269. // fold (xor (and x, y), y) -> (and (not x), y)
  6270. if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
  6271. SDValue X = N0.getOperand(0);
  6272. SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
  6273. AddToWorklist(NotX.getNode());
  6274. return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
  6275. }
  6276. if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
  6277. ConstantSDNode *XorC = isConstOrConstSplat(N1);
  6278. ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
  6279. unsigned BitWidth = VT.getScalarSizeInBits();
  6280. if (XorC && ShiftC) {
  6281. // Don't crash on an oversized shift. We can not guarantee that a bogus
  6282. // shift has been simplified to undef.
  6283. uint64_t ShiftAmt = ShiftC->getLimitedValue();
  6284. if (ShiftAmt < BitWidth) {
  6285. APInt Ones = APInt::getAllOnesValue(BitWidth);
  6286. Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
  6287. if (XorC->getAPIntValue() == Ones) {
  6288. // If the xor constant is a shifted -1, do a 'not' before the shift:
  6289. // xor (X << ShiftC), XorC --> (not X) << ShiftC
  6290. // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
  6291. SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
  6292. return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
  6293. }
  6294. }
  6295. }
  6296. }
  6297. // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
  6298. if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
  6299. SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
  6300. SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
  6301. if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
  6302. SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
  6303. SDValue S0 = S.getOperand(0);
  6304. if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
  6305. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  6306. if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
  6307. if (C->getAPIntValue() == (OpSizeInBits - 1))
  6308. return DAG.getNode(ISD::ABS, DL, VT, S0);
  6309. }
  6310. }
  6311. }
  6312. // fold (xor x, x) -> 0
  6313. if (N0 == N1)
  6314. return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
  6315. // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
  6316. // Here is a concrete example of this equivalence:
  6317. // i16 x == 14
  6318. // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
  6319. // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
  6320. //
  6321. // =>
  6322. //
  6323. // i16 ~1 == 0b1111111111111110
  6324. // i16 rol(~1, 14) == 0b1011111111111111
  6325. //
  6326. // Some additional tips to help conceptualize this transform:
  6327. // - Try to see the operation as placing a single zero in a value of all ones.
  6328. // - There exists no value for x which would allow the result to contain zero.
  6329. // - Values of x larger than the bitwidth are undefined and do not require a
  6330. // consistent result.
  6331. // - Pushing the zero left requires shifting one bits in from the right.
  6332. // A rotate left of ~1 is a nice way of achieving the desired result.
  6333. if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
  6334. isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
  6335. return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
  6336. N0.getOperand(1));
  6337. }
  6338. // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
  6339. if (N0Opcode == N1.getOpcode())
  6340. if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
  6341. return V;
  6342. // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
  6343. if (SDValue MM = unfoldMaskedMerge(N))
  6344. return MM;
  6345. // Simplify the expression using non-local knowledge.
  6346. if (SimplifyDemandedBits(SDValue(N, 0)))
  6347. return SDValue(N, 0);
  6348. return SDValue();
  6349. }
  6350. /// If we have a shift-by-constant of a bitwise logic op that itself has a
  6351. /// shift-by-constant operand with identical opcode, we may be able to convert
  6352. /// that into 2 independent shifts followed by the logic op. This is a
  6353. /// throughput improvement.
  6354. static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
  6355. // Match a one-use bitwise logic op.
  6356. SDValue LogicOp = Shift->getOperand(0);
  6357. if (!LogicOp.hasOneUse())
  6358. return SDValue();
  6359. unsigned LogicOpcode = LogicOp.getOpcode();
  6360. if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
  6361. LogicOpcode != ISD::XOR)
  6362. return SDValue();
  6363. // Find a matching one-use shift by constant.
  6364. unsigned ShiftOpcode = Shift->getOpcode();
  6365. SDValue C1 = Shift->getOperand(1);
  6366. ConstantSDNode *C1Node = isConstOrConstSplat(C1);
  6367. assert(C1Node && "Expected a shift with constant operand");
  6368. const APInt &C1Val = C1Node->getAPIntValue();
  6369. auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
  6370. const APInt *&ShiftAmtVal) {
  6371. if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
  6372. return false;
  6373. ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
  6374. if (!ShiftCNode)
  6375. return false;
  6376. // Capture the shifted operand and shift amount value.
  6377. ShiftOp = V.getOperand(0);
  6378. ShiftAmtVal = &ShiftCNode->getAPIntValue();
  6379. // Shift amount types do not have to match their operand type, so check that
  6380. // the constants are the same width.
  6381. if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
  6382. return false;
  6383. // The fold is not valid if the sum of the shift values exceeds bitwidth.
  6384. if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
  6385. return false;
  6386. return true;
  6387. };
  6388. // Logic ops are commutative, so check each operand for a match.
  6389. SDValue X, Y;
  6390. const APInt *C0Val;
  6391. if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
  6392. Y = LogicOp.getOperand(1);
  6393. else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
  6394. Y = LogicOp.getOperand(0);
  6395. else
  6396. return SDValue();
  6397. // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
  6398. SDLoc DL(Shift);
  6399. EVT VT = Shift->getValueType(0);
  6400. EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
  6401. SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
  6402. SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
  6403. SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
  6404. return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
  6405. }
  6406. /// Handle transforms common to the three shifts, when the shift amount is a
  6407. /// constant.
  6408. /// We are looking for: (shift being one of shl/sra/srl)
  6409. /// shift (binop X, C0), C1
  6410. /// And want to transform into:
  6411. /// binop (shift X, C1), (shift C0, C1)
  6412. SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
  6413. assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
  6414. // Do not turn a 'not' into a regular xor.
  6415. if (isBitwiseNot(N->getOperand(0)))
  6416. return SDValue();
  6417. // The inner binop must be one-use, since we want to replace it.
  6418. SDValue LHS = N->getOperand(0);
  6419. if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
  6420. return SDValue();
  6421. // TODO: This is limited to early combining because it may reveal regressions
  6422. // otherwise. But since we just checked a target hook to see if this is
  6423. // desirable, that should have filtered out cases where this interferes
  6424. // with some other pattern matching.
  6425. if (!LegalTypes)
  6426. if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
  6427. return R;
  6428. // We want to pull some binops through shifts, so that we have (and (shift))
  6429. // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
  6430. // thing happens with address calculations, so it's important to canonicalize
  6431. // it.
  6432. switch (LHS.getOpcode()) {
  6433. default:
  6434. return SDValue();
  6435. case ISD::OR:
  6436. case ISD::XOR:
  6437. case ISD::AND:
  6438. break;
  6439. case ISD::ADD:
  6440. if (N->getOpcode() != ISD::SHL)
  6441. return SDValue(); // only shl(add) not sr[al](add).
  6442. break;
  6443. }
  6444. // We require the RHS of the binop to be a constant and not opaque as well.
  6445. ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
  6446. if (!BinOpCst)
  6447. return SDValue();
  6448. // FIXME: disable this unless the input to the binop is a shift by a constant
  6449. // or is copy/select. Enable this in other cases when figure out it's exactly
  6450. // profitable.
  6451. SDValue BinOpLHSVal = LHS.getOperand(0);
  6452. bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
  6453. BinOpLHSVal.getOpcode() == ISD::SRA ||
  6454. BinOpLHSVal.getOpcode() == ISD::SRL) &&
  6455. isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
  6456. bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
  6457. BinOpLHSVal.getOpcode() == ISD::SELECT;
  6458. if (!IsShiftByConstant && !IsCopyOrSelect)
  6459. return SDValue();
  6460. if (IsCopyOrSelect && N->hasOneUse())
  6461. return SDValue();
  6462. // Fold the constants, shifting the binop RHS by the shift amount.
  6463. SDLoc DL(N);
  6464. EVT VT = N->getValueType(0);
  6465. SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
  6466. N->getOperand(1));
  6467. assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
  6468. SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
  6469. N->getOperand(1));
  6470. return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
  6471. }
  6472. SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
  6473. assert(N->getOpcode() == ISD::TRUNCATE);
  6474. assert(N->getOperand(0).getOpcode() == ISD::AND);
  6475. // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
  6476. EVT TruncVT = N->getValueType(0);
  6477. if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
  6478. TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
  6479. SDValue N01 = N->getOperand(0).getOperand(1);
  6480. if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
  6481. SDLoc DL(N);
  6482. SDValue N00 = N->getOperand(0).getOperand(0);
  6483. SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
  6484. SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
  6485. AddToWorklist(Trunc00.getNode());
  6486. AddToWorklist(Trunc01.getNode());
  6487. return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
  6488. }
  6489. }
  6490. return SDValue();
  6491. }
  6492. SDValue DAGCombiner::visitRotate(SDNode *N) {
  6493. SDLoc dl(N);
  6494. SDValue N0 = N->getOperand(0);
  6495. SDValue N1 = N->getOperand(1);
  6496. EVT VT = N->getValueType(0);
  6497. unsigned Bitsize = VT.getScalarSizeInBits();
  6498. // fold (rot x, 0) -> x
  6499. if (isNullOrNullSplat(N1))
  6500. return N0;
  6501. // fold (rot x, c) -> x iff (c % BitSize) == 0
  6502. if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
  6503. APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
  6504. if (DAG.MaskedValueIsZero(N1, ModuloMask))
  6505. return N0;
  6506. }
  6507. // fold (rot x, c) -> (rot x, c % BitSize)
  6508. // TODO - support non-uniform vector amounts.
  6509. if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
  6510. if (Cst->getAPIntValue().uge(Bitsize)) {
  6511. uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
  6512. return DAG.getNode(N->getOpcode(), dl, VT, N0,
  6513. DAG.getConstant(RotAmt, dl, N1.getValueType()));
  6514. }
  6515. }
  6516. // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
  6517. if (N1.getOpcode() == ISD::TRUNCATE &&
  6518. N1.getOperand(0).getOpcode() == ISD::AND) {
  6519. if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
  6520. return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
  6521. }
  6522. unsigned NextOp = N0.getOpcode();
  6523. // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
  6524. if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
  6525. SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
  6526. SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
  6527. if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
  6528. EVT ShiftVT = C1->getValueType(0);
  6529. bool SameSide = (N->getOpcode() == NextOp);
  6530. unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
  6531. if (SDValue CombinedShift =
  6532. DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
  6533. SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
  6534. SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
  6535. ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
  6536. BitsizeC.getNode());
  6537. return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
  6538. CombinedShiftNorm);
  6539. }
  6540. }
  6541. }
  6542. return SDValue();
  6543. }
  6544. SDValue DAGCombiner::visitSHL(SDNode *N) {
  6545. SDValue N0 = N->getOperand(0);
  6546. SDValue N1 = N->getOperand(1);
  6547. if (SDValue V = DAG.simplifyShift(N0, N1))
  6548. return V;
  6549. EVT VT = N0.getValueType();
  6550. EVT ShiftVT = N1.getValueType();
  6551. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  6552. // fold vector ops
  6553. if (VT.isVector()) {
  6554. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  6555. return FoldedVOp;
  6556. BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
  6557. // If setcc produces all-one true value then:
  6558. // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
  6559. if (N1CV && N1CV->isConstant()) {
  6560. if (N0.getOpcode() == ISD::AND) {
  6561. SDValue N00 = N0->getOperand(0);
  6562. SDValue N01 = N0->getOperand(1);
  6563. BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
  6564. if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
  6565. TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
  6566. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  6567. if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
  6568. N01CV, N1CV))
  6569. return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
  6570. }
  6571. }
  6572. }
  6573. }
  6574. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  6575. // fold (shl c1, c2) -> c1<<c2
  6576. // TODO - support non-uniform vector shift amounts.
  6577. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  6578. if (N0C && N1C && !N1C->isOpaque())
  6579. return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
  6580. if (SDValue NewSel = foldBinOpIntoSelect(N))
  6581. return NewSel;
  6582. // if (shl x, c) is known to be zero, return 0
  6583. if (DAG.MaskedValueIsZero(SDValue(N, 0),
  6584. APInt::getAllOnesValue(OpSizeInBits)))
  6585. return DAG.getConstant(0, SDLoc(N), VT);
  6586. // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
  6587. if (N1.getOpcode() == ISD::TRUNCATE &&
  6588. N1.getOperand(0).getOpcode() == ISD::AND) {
  6589. if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
  6590. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
  6591. }
  6592. // TODO - support non-uniform vector shift amounts.
  6593. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  6594. return SDValue(N, 0);
  6595. // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
  6596. if (N0.getOpcode() == ISD::SHL) {
  6597. auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
  6598. ConstantSDNode *RHS) {
  6599. APInt c1 = LHS->getAPIntValue();
  6600. APInt c2 = RHS->getAPIntValue();
  6601. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6602. return (c1 + c2).uge(OpSizeInBits);
  6603. };
  6604. if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
  6605. return DAG.getConstant(0, SDLoc(N), VT);
  6606. auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
  6607. ConstantSDNode *RHS) {
  6608. APInt c1 = LHS->getAPIntValue();
  6609. APInt c2 = RHS->getAPIntValue();
  6610. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6611. return (c1 + c2).ult(OpSizeInBits);
  6612. };
  6613. if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
  6614. SDLoc DL(N);
  6615. SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
  6616. return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
  6617. }
  6618. }
  6619. // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
  6620. // For this to be valid, the second form must not preserve any of the bits
  6621. // that are shifted out by the inner shift in the first form. This means
  6622. // the outer shift size must be >= the number of bits added by the ext.
  6623. // As a corollary, we don't care what kind of ext it is.
  6624. if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
  6625. N0.getOpcode() == ISD::ANY_EXTEND ||
  6626. N0.getOpcode() == ISD::SIGN_EXTEND) &&
  6627. N0.getOperand(0).getOpcode() == ISD::SHL) {
  6628. SDValue N0Op0 = N0.getOperand(0);
  6629. SDValue InnerShiftAmt = N0Op0.getOperand(1);
  6630. EVT InnerVT = N0Op0.getValueType();
  6631. uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
  6632. auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
  6633. ConstantSDNode *RHS) {
  6634. APInt c1 = LHS->getAPIntValue();
  6635. APInt c2 = RHS->getAPIntValue();
  6636. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6637. return c2.uge(OpSizeInBits - InnerBitwidth) &&
  6638. (c1 + c2).uge(OpSizeInBits);
  6639. };
  6640. if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
  6641. /*AllowUndefs*/ false,
  6642. /*AllowTypeMismatch*/ true))
  6643. return DAG.getConstant(0, SDLoc(N), VT);
  6644. auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
  6645. ConstantSDNode *RHS) {
  6646. APInt c1 = LHS->getAPIntValue();
  6647. APInt c2 = RHS->getAPIntValue();
  6648. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6649. return c2.uge(OpSizeInBits - InnerBitwidth) &&
  6650. (c1 + c2).ult(OpSizeInBits);
  6651. };
  6652. if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
  6653. /*AllowUndefs*/ false,
  6654. /*AllowTypeMismatch*/ true)) {
  6655. SDLoc DL(N);
  6656. SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
  6657. SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
  6658. Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
  6659. return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
  6660. }
  6661. }
  6662. // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
  6663. // Only fold this if the inner zext has no other uses to avoid increasing
  6664. // the total number of instructions.
  6665. if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
  6666. N0.getOperand(0).getOpcode() == ISD::SRL) {
  6667. SDValue N0Op0 = N0.getOperand(0);
  6668. SDValue InnerShiftAmt = N0Op0.getOperand(1);
  6669. auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
  6670. APInt c1 = LHS->getAPIntValue();
  6671. APInt c2 = RHS->getAPIntValue();
  6672. zeroExtendToMatch(c1, c2);
  6673. return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
  6674. };
  6675. if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
  6676. /*AllowUndefs*/ false,
  6677. /*AllowTypeMismatch*/ true)) {
  6678. SDLoc DL(N);
  6679. EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
  6680. SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
  6681. NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
  6682. AddToWorklist(NewSHL.getNode());
  6683. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
  6684. }
  6685. }
  6686. // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
  6687. // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
  6688. // TODO - support non-uniform vector shift amounts.
  6689. if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
  6690. N0->getFlags().hasExact()) {
  6691. if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
  6692. uint64_t C1 = N0C1->getZExtValue();
  6693. uint64_t C2 = N1C->getZExtValue();
  6694. SDLoc DL(N);
  6695. if (C1 <= C2)
  6696. return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
  6697. DAG.getConstant(C2 - C1, DL, ShiftVT));
  6698. return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
  6699. DAG.getConstant(C1 - C2, DL, ShiftVT));
  6700. }
  6701. }
  6702. // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
  6703. // (and (srl x, (sub c1, c2), MASK)
  6704. // Only fold this if the inner shift has no other uses -- if it does, folding
  6705. // this will increase the total number of instructions.
  6706. // TODO - drop hasOneUse requirement if c1 == c2?
  6707. // TODO - support non-uniform vector shift amounts.
  6708. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
  6709. TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
  6710. if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
  6711. if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
  6712. uint64_t c1 = N0C1->getZExtValue();
  6713. uint64_t c2 = N1C->getZExtValue();
  6714. APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
  6715. SDValue Shift;
  6716. if (c2 > c1) {
  6717. Mask <<= c2 - c1;
  6718. SDLoc DL(N);
  6719. Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
  6720. DAG.getConstant(c2 - c1, DL, ShiftVT));
  6721. } else {
  6722. Mask.lshrInPlace(c1 - c2);
  6723. SDLoc DL(N);
  6724. Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
  6725. DAG.getConstant(c1 - c2, DL, ShiftVT));
  6726. }
  6727. SDLoc DL(N0);
  6728. return DAG.getNode(ISD::AND, DL, VT, Shift,
  6729. DAG.getConstant(Mask, DL, VT));
  6730. }
  6731. }
  6732. }
  6733. // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
  6734. if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
  6735. isConstantOrConstantVector(N1, /* No Opaques */ true)) {
  6736. SDLoc DL(N);
  6737. SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
  6738. SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
  6739. return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
  6740. }
  6741. // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
  6742. // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
  6743. // Variant of version done on multiply, except mul by a power of 2 is turned
  6744. // into a shift.
  6745. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
  6746. N0.getNode()->hasOneUse() &&
  6747. isConstantOrConstantVector(N1, /* No Opaques */ true) &&
  6748. isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
  6749. TLI.isDesirableToCommuteWithShift(N, Level)) {
  6750. SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
  6751. SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
  6752. AddToWorklist(Shl0.getNode());
  6753. AddToWorklist(Shl1.getNode());
  6754. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
  6755. }
  6756. // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
  6757. if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
  6758. isConstantOrConstantVector(N1, /* No Opaques */ true) &&
  6759. isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
  6760. SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
  6761. if (isConstantOrConstantVector(Shl))
  6762. return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
  6763. }
  6764. if (N1C && !N1C->isOpaque())
  6765. if (SDValue NewSHL = visitShiftByConstant(N))
  6766. return NewSHL;
  6767. return SDValue();
  6768. }
  6769. SDValue DAGCombiner::visitSRA(SDNode *N) {
  6770. SDValue N0 = N->getOperand(0);
  6771. SDValue N1 = N->getOperand(1);
  6772. if (SDValue V = DAG.simplifyShift(N0, N1))
  6773. return V;
  6774. EVT VT = N0.getValueType();
  6775. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  6776. // Arithmetic shifting an all-sign-bit value is a no-op.
  6777. // fold (sra 0, x) -> 0
  6778. // fold (sra -1, x) -> -1
  6779. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
  6780. return N0;
  6781. // fold vector ops
  6782. if (VT.isVector())
  6783. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  6784. return FoldedVOp;
  6785. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  6786. // fold (sra c1, c2) -> (sra c1, c2)
  6787. // TODO - support non-uniform vector shift amounts.
  6788. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  6789. if (N0C && N1C && !N1C->isOpaque())
  6790. return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
  6791. if (SDValue NewSel = foldBinOpIntoSelect(N))
  6792. return NewSel;
  6793. // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
  6794. // sext_inreg.
  6795. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
  6796. unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
  6797. EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
  6798. if (VT.isVector())
  6799. ExtVT = EVT::getVectorVT(*DAG.getContext(),
  6800. ExtVT, VT.getVectorNumElements());
  6801. if ((!LegalOperations ||
  6802. TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
  6803. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  6804. N0.getOperand(0), DAG.getValueType(ExtVT));
  6805. }
  6806. // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
  6807. // clamp (add c1, c2) to max shift.
  6808. if (N0.getOpcode() == ISD::SRA) {
  6809. SDLoc DL(N);
  6810. EVT ShiftVT = N1.getValueType();
  6811. EVT ShiftSVT = ShiftVT.getScalarType();
  6812. SmallVector<SDValue, 16> ShiftValues;
  6813. auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
  6814. APInt c1 = LHS->getAPIntValue();
  6815. APInt c2 = RHS->getAPIntValue();
  6816. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6817. APInt Sum = c1 + c2;
  6818. unsigned ShiftSum =
  6819. Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
  6820. ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
  6821. return true;
  6822. };
  6823. if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
  6824. SDValue ShiftValue;
  6825. if (VT.isVector())
  6826. ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
  6827. else
  6828. ShiftValue = ShiftValues[0];
  6829. return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
  6830. }
  6831. }
  6832. // fold (sra (shl X, m), (sub result_size, n))
  6833. // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
  6834. // result_size - n != m.
  6835. // If truncate is free for the target sext(shl) is likely to result in better
  6836. // code.
  6837. if (N0.getOpcode() == ISD::SHL && N1C) {
  6838. // Get the two constanst of the shifts, CN0 = m, CN = n.
  6839. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
  6840. if (N01C) {
  6841. LLVMContext &Ctx = *DAG.getContext();
  6842. // Determine what the truncate's result bitsize and type would be.
  6843. EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
  6844. if (VT.isVector())
  6845. TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
  6846. // Determine the residual right-shift amount.
  6847. int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
  6848. // If the shift is not a no-op (in which case this should be just a sign
  6849. // extend already), the truncated to type is legal, sign_extend is legal
  6850. // on that type, and the truncate to that type is both legal and free,
  6851. // perform the transform.
  6852. if ((ShiftAmt > 0) &&
  6853. TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
  6854. TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
  6855. TLI.isTruncateFree(VT, TruncVT)) {
  6856. SDLoc DL(N);
  6857. SDValue Amt = DAG.getConstant(ShiftAmt, DL,
  6858. getShiftAmountTy(N0.getOperand(0).getValueType()));
  6859. SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
  6860. N0.getOperand(0), Amt);
  6861. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
  6862. Shift);
  6863. return DAG.getNode(ISD::SIGN_EXTEND, DL,
  6864. N->getValueType(0), Trunc);
  6865. }
  6866. }
  6867. }
  6868. // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
  6869. // sra (add (shl X, N1C), AddC), N1C -->
  6870. // sext (add (trunc X to (width - N1C)), AddC')
  6871. if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
  6872. N0.getOperand(0).getOpcode() == ISD::SHL &&
  6873. N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
  6874. if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
  6875. SDValue Shl = N0.getOperand(0);
  6876. // Determine what the truncate's type would be and ask the target if that
  6877. // is a free operation.
  6878. LLVMContext &Ctx = *DAG.getContext();
  6879. unsigned ShiftAmt = N1C->getZExtValue();
  6880. EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
  6881. if (VT.isVector())
  6882. TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
  6883. // TODO: The simple type check probably belongs in the default hook
  6884. // implementation and/or target-specific overrides (because
  6885. // non-simple types likely require masking when legalized), but that
  6886. // restriction may conflict with other transforms.
  6887. if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
  6888. SDLoc DL(N);
  6889. SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
  6890. SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
  6891. trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
  6892. SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
  6893. return DAG.getSExtOrTrunc(Add, DL, VT);
  6894. }
  6895. }
  6896. }
  6897. // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
  6898. if (N1.getOpcode() == ISD::TRUNCATE &&
  6899. N1.getOperand(0).getOpcode() == ISD::AND) {
  6900. if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
  6901. return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
  6902. }
  6903. // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
  6904. // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
  6905. // if c1 is equal to the number of bits the trunc removes
  6906. // TODO - support non-uniform vector shift amounts.
  6907. if (N0.getOpcode() == ISD::TRUNCATE &&
  6908. (N0.getOperand(0).getOpcode() == ISD::SRL ||
  6909. N0.getOperand(0).getOpcode() == ISD::SRA) &&
  6910. N0.getOperand(0).hasOneUse() &&
  6911. N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
  6912. SDValue N0Op0 = N0.getOperand(0);
  6913. if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
  6914. EVT LargeVT = N0Op0.getValueType();
  6915. unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
  6916. if (LargeShift->getAPIntValue() == TruncBits) {
  6917. SDLoc DL(N);
  6918. SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
  6919. getShiftAmountTy(LargeVT));
  6920. SDValue SRA =
  6921. DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
  6922. return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
  6923. }
  6924. }
  6925. }
  6926. // Simplify, based on bits shifted out of the LHS.
  6927. // TODO - support non-uniform vector shift amounts.
  6928. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  6929. return SDValue(N, 0);
  6930. // If the sign bit is known to be zero, switch this to a SRL.
  6931. if (DAG.SignBitIsZero(N0))
  6932. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  6933. if (N1C && !N1C->isOpaque())
  6934. if (SDValue NewSRA = visitShiftByConstant(N))
  6935. return NewSRA;
  6936. return SDValue();
  6937. }
  6938. SDValue DAGCombiner::visitSRL(SDNode *N) {
  6939. SDValue N0 = N->getOperand(0);
  6940. SDValue N1 = N->getOperand(1);
  6941. if (SDValue V = DAG.simplifyShift(N0, N1))
  6942. return V;
  6943. EVT VT = N0.getValueType();
  6944. unsigned OpSizeInBits = VT.getScalarSizeInBits();
  6945. // fold vector ops
  6946. if (VT.isVector())
  6947. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  6948. return FoldedVOp;
  6949. ConstantSDNode *N1C = isConstOrConstSplat(N1);
  6950. // fold (srl c1, c2) -> c1 >>u c2
  6951. // TODO - support non-uniform vector shift amounts.
  6952. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
  6953. if (N0C && N1C && !N1C->isOpaque())
  6954. return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
  6955. if (SDValue NewSel = foldBinOpIntoSelect(N))
  6956. return NewSel;
  6957. // if (srl x, c) is known to be zero, return 0
  6958. if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
  6959. APInt::getAllOnesValue(OpSizeInBits)))
  6960. return DAG.getConstant(0, SDLoc(N), VT);
  6961. // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
  6962. if (N0.getOpcode() == ISD::SRL) {
  6963. auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
  6964. ConstantSDNode *RHS) {
  6965. APInt c1 = LHS->getAPIntValue();
  6966. APInt c2 = RHS->getAPIntValue();
  6967. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6968. return (c1 + c2).uge(OpSizeInBits);
  6969. };
  6970. if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
  6971. return DAG.getConstant(0, SDLoc(N), VT);
  6972. auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
  6973. ConstantSDNode *RHS) {
  6974. APInt c1 = LHS->getAPIntValue();
  6975. APInt c2 = RHS->getAPIntValue();
  6976. zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
  6977. return (c1 + c2).ult(OpSizeInBits);
  6978. };
  6979. if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
  6980. SDLoc DL(N);
  6981. EVT ShiftVT = N1.getValueType();
  6982. SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
  6983. return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
  6984. }
  6985. }
  6986. // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
  6987. // TODO - support non-uniform vector shift amounts.
  6988. if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
  6989. N0.getOperand(0).getOpcode() == ISD::SRL) {
  6990. if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
  6991. uint64_t c1 = N001C->getZExtValue();
  6992. uint64_t c2 = N1C->getZExtValue();
  6993. EVT InnerShiftVT = N0.getOperand(0).getValueType();
  6994. EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
  6995. uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
  6996. // This is only valid if the OpSizeInBits + c1 = size of inner shift.
  6997. if (c1 + OpSizeInBits == InnerShiftSize) {
  6998. SDLoc DL(N0);
  6999. if (c1 + c2 >= InnerShiftSize)
  7000. return DAG.getConstant(0, DL, VT);
  7001. return DAG.getNode(ISD::TRUNCATE, DL, VT,
  7002. DAG.getNode(ISD::SRL, DL, InnerShiftVT,
  7003. N0.getOperand(0).getOperand(0),
  7004. DAG.getConstant(c1 + c2, DL,
  7005. ShiftCountVT)));
  7006. }
  7007. }
  7008. }
  7009. // fold (srl (shl x, c), c) -> (and x, cst2)
  7010. // TODO - (srl (shl x, c1), c2).
  7011. if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
  7012. isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
  7013. SDLoc DL(N);
  7014. SDValue Mask =
  7015. DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
  7016. AddToWorklist(Mask.getNode());
  7017. return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
  7018. }
  7019. // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
  7020. // TODO - support non-uniform vector shift amounts.
  7021. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
  7022. // Shifting in all undef bits?
  7023. EVT SmallVT = N0.getOperand(0).getValueType();
  7024. unsigned BitSize = SmallVT.getScalarSizeInBits();
  7025. if (N1C->getAPIntValue().uge(BitSize))
  7026. return DAG.getUNDEF(VT);
  7027. if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
  7028. uint64_t ShiftAmt = N1C->getZExtValue();
  7029. SDLoc DL0(N0);
  7030. SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
  7031. N0.getOperand(0),
  7032. DAG.getConstant(ShiftAmt, DL0,
  7033. getShiftAmountTy(SmallVT)));
  7034. AddToWorklist(SmallShift.getNode());
  7035. APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
  7036. SDLoc DL(N);
  7037. return DAG.getNode(ISD::AND, DL, VT,
  7038. DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
  7039. DAG.getConstant(Mask, DL, VT));
  7040. }
  7041. }
  7042. // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
  7043. // bit, which is unmodified by sra.
  7044. if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
  7045. if (N0.getOpcode() == ISD::SRA)
  7046. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
  7047. }
  7048. // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
  7049. if (N1C && N0.getOpcode() == ISD::CTLZ &&
  7050. N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
  7051. KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
  7052. // If any of the input bits are KnownOne, then the input couldn't be all
  7053. // zeros, thus the result of the srl will always be zero.
  7054. if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
  7055. // If all of the bits input the to ctlz node are known to be zero, then
  7056. // the result of the ctlz is "32" and the result of the shift is one.
  7057. APInt UnknownBits = ~Known.Zero;
  7058. if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
  7059. // Otherwise, check to see if there is exactly one bit input to the ctlz.
  7060. if (UnknownBits.isPowerOf2()) {
  7061. // Okay, we know that only that the single bit specified by UnknownBits
  7062. // could be set on input to the CTLZ node. If this bit is set, the SRL
  7063. // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
  7064. // to an SRL/XOR pair, which is likely to simplify more.
  7065. unsigned ShAmt = UnknownBits.countTrailingZeros();
  7066. SDValue Op = N0.getOperand(0);
  7067. if (ShAmt) {
  7068. SDLoc DL(N0);
  7069. Op = DAG.getNode(ISD::SRL, DL, VT, Op,
  7070. DAG.getConstant(ShAmt, DL,
  7071. getShiftAmountTy(Op.getValueType())));
  7072. AddToWorklist(Op.getNode());
  7073. }
  7074. SDLoc DL(N);
  7075. return DAG.getNode(ISD::XOR, DL, VT,
  7076. Op, DAG.getConstant(1, DL, VT));
  7077. }
  7078. }
  7079. // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
  7080. if (N1.getOpcode() == ISD::TRUNCATE &&
  7081. N1.getOperand(0).getOpcode() == ISD::AND) {
  7082. if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
  7083. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
  7084. }
  7085. // fold operands of srl based on knowledge that the low bits are not
  7086. // demanded.
  7087. // TODO - support non-uniform vector shift amounts.
  7088. if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
  7089. return SDValue(N, 0);
  7090. if (N1C && !N1C->isOpaque())
  7091. if (SDValue NewSRL = visitShiftByConstant(N))
  7092. return NewSRL;
  7093. // Attempt to convert a srl of a load into a narrower zero-extending load.
  7094. if (SDValue NarrowLoad = ReduceLoadWidth(N))
  7095. return NarrowLoad;
  7096. // Here is a common situation. We want to optimize:
  7097. //
  7098. // %a = ...
  7099. // %b = and i32 %a, 2
  7100. // %c = srl i32 %b, 1
  7101. // brcond i32 %c ...
  7102. //
  7103. // into
  7104. //
  7105. // %a = ...
  7106. // %b = and %a, 2
  7107. // %c = setcc eq %b, 0
  7108. // brcond %c ...
  7109. //
  7110. // However when after the source operand of SRL is optimized into AND, the SRL
  7111. // itself may not be optimized further. Look for it and add the BRCOND into
  7112. // the worklist.
  7113. if (N->hasOneUse()) {
  7114. SDNode *Use = *N->use_begin();
  7115. if (Use->getOpcode() == ISD::BRCOND)
  7116. AddToWorklist(Use);
  7117. else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
  7118. // Also look pass the truncate.
  7119. Use = *Use->use_begin();
  7120. if (Use->getOpcode() == ISD::BRCOND)
  7121. AddToWorklist(Use);
  7122. }
  7123. }
  7124. return SDValue();
  7125. }
  7126. SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
  7127. EVT VT = N->getValueType(0);
  7128. SDValue N0 = N->getOperand(0);
  7129. SDValue N1 = N->getOperand(1);
  7130. SDValue N2 = N->getOperand(2);
  7131. bool IsFSHL = N->getOpcode() == ISD::FSHL;
  7132. unsigned BitWidth = VT.getScalarSizeInBits();
  7133. // fold (fshl N0, N1, 0) -> N0
  7134. // fold (fshr N0, N1, 0) -> N1
  7135. if (isPowerOf2_32(BitWidth))
  7136. if (DAG.MaskedValueIsZero(
  7137. N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
  7138. return IsFSHL ? N0 : N1;
  7139. auto IsUndefOrZero = [](SDValue V) {
  7140. return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
  7141. };
  7142. // TODO - support non-uniform vector shift amounts.
  7143. if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
  7144. EVT ShAmtTy = N2.getValueType();
  7145. // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
  7146. if (Cst->getAPIntValue().uge(BitWidth)) {
  7147. uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
  7148. return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
  7149. DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
  7150. }
  7151. unsigned ShAmt = Cst->getZExtValue();
  7152. if (ShAmt == 0)
  7153. return IsFSHL ? N0 : N1;
  7154. // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
  7155. // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
  7156. // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
  7157. // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
  7158. if (IsUndefOrZero(N0))
  7159. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
  7160. DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
  7161. SDLoc(N), ShAmtTy));
  7162. if (IsUndefOrZero(N1))
  7163. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
  7164. DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
  7165. SDLoc(N), ShAmtTy));
  7166. }
  7167. // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
  7168. // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
  7169. // iff We know the shift amount is in range.
  7170. // TODO: when is it worth doing SUB(BW, N2) as well?
  7171. if (isPowerOf2_32(BitWidth)) {
  7172. APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
  7173. if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
  7174. return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
  7175. if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
  7176. return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
  7177. }
  7178. // fold (fshl N0, N0, N2) -> (rotl N0, N2)
  7179. // fold (fshr N0, N0, N2) -> (rotr N0, N2)
  7180. // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
  7181. // is legal as well we might be better off avoiding non-constant (BW - N2).
  7182. unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
  7183. if (N0 == N1 && hasOperation(RotOpc, VT))
  7184. return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
  7185. // Simplify, based on bits shifted out of N0/N1.
  7186. if (SimplifyDemandedBits(SDValue(N, 0)))
  7187. return SDValue(N, 0);
  7188. return SDValue();
  7189. }
  7190. SDValue DAGCombiner::visitABS(SDNode *N) {
  7191. SDValue N0 = N->getOperand(0);
  7192. EVT VT = N->getValueType(0);
  7193. // fold (abs c1) -> c2
  7194. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7195. return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
  7196. // fold (abs (abs x)) -> (abs x)
  7197. if (N0.getOpcode() == ISD::ABS)
  7198. return N0;
  7199. // fold (abs x) -> x iff not-negative
  7200. if (DAG.SignBitIsZero(N0))
  7201. return N0;
  7202. return SDValue();
  7203. }
  7204. SDValue DAGCombiner::visitBSWAP(SDNode *N) {
  7205. SDValue N0 = N->getOperand(0);
  7206. EVT VT = N->getValueType(0);
  7207. // fold (bswap c1) -> c2
  7208. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7209. return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
  7210. // fold (bswap (bswap x)) -> x
  7211. if (N0.getOpcode() == ISD::BSWAP)
  7212. return N0->getOperand(0);
  7213. return SDValue();
  7214. }
  7215. SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
  7216. SDValue N0 = N->getOperand(0);
  7217. EVT VT = N->getValueType(0);
  7218. // fold (bitreverse c1) -> c2
  7219. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7220. return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
  7221. // fold (bitreverse (bitreverse x)) -> x
  7222. if (N0.getOpcode() == ISD::BITREVERSE)
  7223. return N0.getOperand(0);
  7224. return SDValue();
  7225. }
  7226. SDValue DAGCombiner::visitCTLZ(SDNode *N) {
  7227. SDValue N0 = N->getOperand(0);
  7228. EVT VT = N->getValueType(0);
  7229. // fold (ctlz c1) -> c2
  7230. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7231. return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
  7232. // If the value is known never to be zero, switch to the undef version.
  7233. if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
  7234. if (DAG.isKnownNeverZero(N0))
  7235. return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  7236. }
  7237. return SDValue();
  7238. }
  7239. SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
  7240. SDValue N0 = N->getOperand(0);
  7241. EVT VT = N->getValueType(0);
  7242. // fold (ctlz_zero_undef c1) -> c2
  7243. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7244. return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  7245. return SDValue();
  7246. }
  7247. SDValue DAGCombiner::visitCTTZ(SDNode *N) {
  7248. SDValue N0 = N->getOperand(0);
  7249. EVT VT = N->getValueType(0);
  7250. // fold (cttz c1) -> c2
  7251. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7252. return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
  7253. // If the value is known never to be zero, switch to the undef version.
  7254. if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
  7255. if (DAG.isKnownNeverZero(N0))
  7256. return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  7257. }
  7258. return SDValue();
  7259. }
  7260. SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
  7261. SDValue N0 = N->getOperand(0);
  7262. EVT VT = N->getValueType(0);
  7263. // fold (cttz_zero_undef c1) -> c2
  7264. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7265. return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
  7266. return SDValue();
  7267. }
  7268. SDValue DAGCombiner::visitCTPOP(SDNode *N) {
  7269. SDValue N0 = N->getOperand(0);
  7270. EVT VT = N->getValueType(0);
  7271. // fold (ctpop c1) -> c2
  7272. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  7273. return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
  7274. return SDValue();
  7275. }
  7276. // FIXME: This should be checking for no signed zeros on individual operands, as
  7277. // well as no nans.
  7278. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
  7279. SDValue RHS,
  7280. const TargetLowering &TLI) {
  7281. const TargetOptions &Options = DAG.getTarget().Options;
  7282. EVT VT = LHS.getValueType();
  7283. return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
  7284. TLI.isProfitableToCombineMinNumMaxNum(VT) &&
  7285. DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
  7286. }
  7287. /// Generate Min/Max node
  7288. static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
  7289. SDValue RHS, SDValue True, SDValue False,
  7290. ISD::CondCode CC, const TargetLowering &TLI,
  7291. SelectionDAG &DAG) {
  7292. if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
  7293. return SDValue();
  7294. EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
  7295. switch (CC) {
  7296. case ISD::SETOLT:
  7297. case ISD::SETOLE:
  7298. case ISD::SETLT:
  7299. case ISD::SETLE:
  7300. case ISD::SETULT:
  7301. case ISD::SETULE: {
  7302. // Since it's known never nan to get here already, either fminnum or
  7303. // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
  7304. // expanded in terms of it.
  7305. unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
  7306. if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
  7307. return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
  7308. unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
  7309. if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
  7310. return DAG.getNode(Opcode, DL, VT, LHS, RHS);
  7311. return SDValue();
  7312. }
  7313. case ISD::SETOGT:
  7314. case ISD::SETOGE:
  7315. case ISD::SETGT:
  7316. case ISD::SETGE:
  7317. case ISD::SETUGT:
  7318. case ISD::SETUGE: {
  7319. unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
  7320. if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
  7321. return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
  7322. unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
  7323. if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
  7324. return DAG.getNode(Opcode, DL, VT, LHS, RHS);
  7325. return SDValue();
  7326. }
  7327. default:
  7328. return SDValue();
  7329. }
  7330. }
  7331. SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
  7332. SDValue Cond = N->getOperand(0);
  7333. SDValue N1 = N->getOperand(1);
  7334. SDValue N2 = N->getOperand(2);
  7335. EVT VT = N->getValueType(0);
  7336. EVT CondVT = Cond.getValueType();
  7337. SDLoc DL(N);
  7338. if (!VT.isInteger())
  7339. return SDValue();
  7340. auto *C1 = dyn_cast<ConstantSDNode>(N1);
  7341. auto *C2 = dyn_cast<ConstantSDNode>(N2);
  7342. if (!C1 || !C2)
  7343. return SDValue();
  7344. // Only do this before legalization to avoid conflicting with target-specific
  7345. // transforms in the other direction (create a select from a zext/sext). There
  7346. // is also a target-independent combine here in DAGCombiner in the other
  7347. // direction for (select Cond, -1, 0) when the condition is not i1.
  7348. if (CondVT == MVT::i1 && !LegalOperations) {
  7349. if (C1->isNullValue() && C2->isOne()) {
  7350. // select Cond, 0, 1 --> zext (!Cond)
  7351. SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
  7352. if (VT != MVT::i1)
  7353. NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
  7354. return NotCond;
  7355. }
  7356. if (C1->isNullValue() && C2->isAllOnesValue()) {
  7357. // select Cond, 0, -1 --> sext (!Cond)
  7358. SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
  7359. if (VT != MVT::i1)
  7360. NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
  7361. return NotCond;
  7362. }
  7363. if (C1->isOne() && C2->isNullValue()) {
  7364. // select Cond, 1, 0 --> zext (Cond)
  7365. if (VT != MVT::i1)
  7366. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
  7367. return Cond;
  7368. }
  7369. if (C1->isAllOnesValue() && C2->isNullValue()) {
  7370. // select Cond, -1, 0 --> sext (Cond)
  7371. if (VT != MVT::i1)
  7372. Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
  7373. return Cond;
  7374. }
  7375. // For any constants that differ by 1, we can transform the select into an
  7376. // extend and add. Use a target hook because some targets may prefer to
  7377. // transform in the other direction.
  7378. if (TLI.convertSelectOfConstantsToMath(VT)) {
  7379. if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
  7380. // select Cond, C1, C1-1 --> add (zext Cond), C1-1
  7381. if (VT != MVT::i1)
  7382. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
  7383. return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
  7384. }
  7385. if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
  7386. // select Cond, C1, C1+1 --> add (sext Cond), C1+1
  7387. if (VT != MVT::i1)
  7388. Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
  7389. return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
  7390. }
  7391. }
  7392. return SDValue();
  7393. }
  7394. // fold (select Cond, 0, 1) -> (xor Cond, 1)
  7395. // We can't do this reliably if integer based booleans have different contents
  7396. // to floating point based booleans. This is because we can't tell whether we
  7397. // have an integer-based boolean or a floating-point-based boolean unless we
  7398. // can find the SETCC that produced it and inspect its operands. This is
  7399. // fairly easy if C is the SETCC node, but it can potentially be
  7400. // undiscoverable (or not reasonably discoverable). For example, it could be
  7401. // in another basic block or it could require searching a complicated
  7402. // expression.
  7403. if (CondVT.isInteger() &&
  7404. TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
  7405. TargetLowering::ZeroOrOneBooleanContent &&
  7406. TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
  7407. TargetLowering::ZeroOrOneBooleanContent &&
  7408. C1->isNullValue() && C2->isOne()) {
  7409. SDValue NotCond =
  7410. DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
  7411. if (VT.bitsEq(CondVT))
  7412. return NotCond;
  7413. return DAG.getZExtOrTrunc(NotCond, DL, VT);
  7414. }
  7415. return SDValue();
  7416. }
  7417. SDValue DAGCombiner::visitSELECT(SDNode *N) {
  7418. SDValue N0 = N->getOperand(0);
  7419. SDValue N1 = N->getOperand(1);
  7420. SDValue N2 = N->getOperand(2);
  7421. EVT VT = N->getValueType(0);
  7422. EVT VT0 = N0.getValueType();
  7423. SDLoc DL(N);
  7424. SDNodeFlags Flags = N->getFlags();
  7425. if (SDValue V = DAG.simplifySelect(N0, N1, N2))
  7426. return V;
  7427. // fold (select X, X, Y) -> (or X, Y)
  7428. // fold (select X, 1, Y) -> (or C, Y)
  7429. if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
  7430. return DAG.getNode(ISD::OR, DL, VT, N0, N2);
  7431. if (SDValue V = foldSelectOfConstants(N))
  7432. return V;
  7433. // fold (select C, 0, X) -> (and (not C), X)
  7434. if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
  7435. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  7436. AddToWorklist(NOTNode.getNode());
  7437. return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
  7438. }
  7439. // fold (select C, X, 1) -> (or (not C), X)
  7440. if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
  7441. SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
  7442. AddToWorklist(NOTNode.getNode());
  7443. return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
  7444. }
  7445. // fold (select X, Y, X) -> (and X, Y)
  7446. // fold (select X, Y, 0) -> (and X, Y)
  7447. if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
  7448. return DAG.getNode(ISD::AND, DL, VT, N0, N1);
  7449. // If we can fold this based on the true/false value, do so.
  7450. if (SimplifySelectOps(N, N1, N2))
  7451. return SDValue(N, 0); // Don't revisit N.
  7452. if (VT0 == MVT::i1) {
  7453. // The code in this block deals with the following 2 equivalences:
  7454. // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
  7455. // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
  7456. // The target can specify its preferred form with the
  7457. // shouldNormalizeToSelectSequence() callback. However we always transform
  7458. // to the right anyway if we find the inner select exists in the DAG anyway
  7459. // and we always transform to the left side if we know that we can further
  7460. // optimize the combination of the conditions.
  7461. bool normalizeToSequence =
  7462. TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
  7463. // select (and Cond0, Cond1), X, Y
  7464. // -> select Cond0, (select Cond1, X, Y), Y
  7465. if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
  7466. SDValue Cond0 = N0->getOperand(0);
  7467. SDValue Cond1 = N0->getOperand(1);
  7468. SDValue InnerSelect =
  7469. DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
  7470. if (normalizeToSequence || !InnerSelect.use_empty())
  7471. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
  7472. InnerSelect, N2, Flags);
  7473. // Cleanup on failure.
  7474. if (InnerSelect.use_empty())
  7475. recursivelyDeleteUnusedNodes(InnerSelect.getNode());
  7476. }
  7477. // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
  7478. if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
  7479. SDValue Cond0 = N0->getOperand(0);
  7480. SDValue Cond1 = N0->getOperand(1);
  7481. SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
  7482. Cond1, N1, N2, Flags);
  7483. if (normalizeToSequence || !InnerSelect.use_empty())
  7484. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
  7485. InnerSelect, Flags);
  7486. // Cleanup on failure.
  7487. if (InnerSelect.use_empty())
  7488. recursivelyDeleteUnusedNodes(InnerSelect.getNode());
  7489. }
  7490. // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
  7491. if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
  7492. SDValue N1_0 = N1->getOperand(0);
  7493. SDValue N1_1 = N1->getOperand(1);
  7494. SDValue N1_2 = N1->getOperand(2);
  7495. if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
  7496. // Create the actual and node if we can generate good code for it.
  7497. if (!normalizeToSequence) {
  7498. SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
  7499. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
  7500. N2, Flags);
  7501. }
  7502. // Otherwise see if we can optimize the "and" to a better pattern.
  7503. if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
  7504. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
  7505. N2, Flags);
  7506. }
  7507. }
  7508. }
  7509. // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
  7510. if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
  7511. SDValue N2_0 = N2->getOperand(0);
  7512. SDValue N2_1 = N2->getOperand(1);
  7513. SDValue N2_2 = N2->getOperand(2);
  7514. if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
  7515. // Create the actual or node if we can generate good code for it.
  7516. if (!normalizeToSequence) {
  7517. SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
  7518. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
  7519. N2_2, Flags);
  7520. }
  7521. // Otherwise see if we can optimize to a better pattern.
  7522. if (SDValue Combined = visitORLike(N0, N2_0, N))
  7523. return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
  7524. N2_2, Flags);
  7525. }
  7526. }
  7527. }
  7528. // select (not Cond), N1, N2 -> select Cond, N2, N1
  7529. if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
  7530. SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
  7531. SelectOp->setFlags(Flags);
  7532. return SelectOp;
  7533. }
  7534. // Fold selects based on a setcc into other things, such as min/max/abs.
  7535. if (N0.getOpcode() == ISD::SETCC) {
  7536. SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
  7537. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  7538. // select (fcmp lt x, y), x, y -> fminnum x, y
  7539. // select (fcmp gt x, y), x, y -> fmaxnum x, y
  7540. //
  7541. // This is OK if we don't care what happens if either operand is a NaN.
  7542. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
  7543. if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
  7544. CC, TLI, DAG))
  7545. return FMinMax;
  7546. // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
  7547. // This is conservatively limited to pre-legal-operations to give targets
  7548. // a chance to reverse the transform if they want to do that. Also, it is
  7549. // unlikely that the pattern would be formed late, so it's probably not
  7550. // worth going through the other checks.
  7551. if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
  7552. CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
  7553. N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
  7554. auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
  7555. auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
  7556. if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
  7557. // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
  7558. // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
  7559. //
  7560. // The IR equivalent of this transform would have this form:
  7561. // %a = add %x, C
  7562. // %c = icmp ugt %x, ~C
  7563. // %r = select %c, -1, %a
  7564. // =>
  7565. // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
  7566. // %u0 = extractvalue %u, 0
  7567. // %u1 = extractvalue %u, 1
  7568. // %r = select %u1, -1, %u0
  7569. SDVTList VTs = DAG.getVTList(VT, VT0);
  7570. SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
  7571. return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
  7572. }
  7573. }
  7574. if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
  7575. (!LegalOperations &&
  7576. TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
  7577. // Any flags available in a select/setcc fold will be on the setcc as they
  7578. // migrated from fcmp
  7579. Flags = N0.getNode()->getFlags();
  7580. SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
  7581. N2, N0.getOperand(2));
  7582. SelectNode->setFlags(Flags);
  7583. return SelectNode;
  7584. }
  7585. return SimplifySelect(DL, N0, N1, N2);
  7586. }
  7587. return SDValue();
  7588. }
  7589. // This function assumes all the vselect's arguments are CONCAT_VECTOR
  7590. // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
  7591. static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
  7592. SDLoc DL(N);
  7593. SDValue Cond = N->getOperand(0);
  7594. SDValue LHS = N->getOperand(1);
  7595. SDValue RHS = N->getOperand(2);
  7596. EVT VT = N->getValueType(0);
  7597. int NumElems = VT.getVectorNumElements();
  7598. assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
  7599. RHS.getOpcode() == ISD::CONCAT_VECTORS &&
  7600. Cond.getOpcode() == ISD::BUILD_VECTOR);
  7601. // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
  7602. // binary ones here.
  7603. if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
  7604. return SDValue();
  7605. // We're sure we have an even number of elements due to the
  7606. // concat_vectors we have as arguments to vselect.
  7607. // Skip BV elements until we find one that's not an UNDEF
  7608. // After we find an UNDEF element, keep looping until we get to half the
  7609. // length of the BV and see if all the non-undef nodes are the same.
  7610. ConstantSDNode *BottomHalf = nullptr;
  7611. for (int i = 0; i < NumElems / 2; ++i) {
  7612. if (Cond->getOperand(i)->isUndef())
  7613. continue;
  7614. if (BottomHalf == nullptr)
  7615. BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
  7616. else if (Cond->getOperand(i).getNode() != BottomHalf)
  7617. return SDValue();
  7618. }
  7619. // Do the same for the second half of the BuildVector
  7620. ConstantSDNode *TopHalf = nullptr;
  7621. for (int i = NumElems / 2; i < NumElems; ++i) {
  7622. if (Cond->getOperand(i)->isUndef())
  7623. continue;
  7624. if (TopHalf == nullptr)
  7625. TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
  7626. else if (Cond->getOperand(i).getNode() != TopHalf)
  7627. return SDValue();
  7628. }
  7629. assert(TopHalf && BottomHalf &&
  7630. "One half of the selector was all UNDEFs and the other was all the "
  7631. "same value. This should have been addressed before this function.");
  7632. return DAG.getNode(
  7633. ISD::CONCAT_VECTORS, DL, VT,
  7634. BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
  7635. TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
  7636. }
  7637. SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
  7638. MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
  7639. SDValue Mask = MSC->getMask();
  7640. SDValue Chain = MSC->getChain();
  7641. SDLoc DL(N);
  7642. // Zap scatters with a zero mask.
  7643. if (ISD::isBuildVectorAllZeros(Mask.getNode()))
  7644. return Chain;
  7645. return SDValue();
  7646. }
  7647. SDValue DAGCombiner::visitMSTORE(SDNode *N) {
  7648. MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
  7649. SDValue Mask = MST->getMask();
  7650. SDValue Chain = MST->getChain();
  7651. SDLoc DL(N);
  7652. // Zap masked stores with a zero mask.
  7653. if (ISD::isBuildVectorAllZeros(Mask.getNode()))
  7654. return Chain;
  7655. return SDValue();
  7656. }
  7657. SDValue DAGCombiner::visitMGATHER(SDNode *N) {
  7658. MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
  7659. SDValue Mask = MGT->getMask();
  7660. SDLoc DL(N);
  7661. // Zap gathers with a zero mask.
  7662. if (ISD::isBuildVectorAllZeros(Mask.getNode()))
  7663. return CombineTo(N, MGT->getPassThru(), MGT->getChain());
  7664. return SDValue();
  7665. }
  7666. SDValue DAGCombiner::visitMLOAD(SDNode *N) {
  7667. MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
  7668. SDValue Mask = MLD->getMask();
  7669. SDLoc DL(N);
  7670. // Zap masked loads with a zero mask.
  7671. if (ISD::isBuildVectorAllZeros(Mask.getNode()))
  7672. return CombineTo(N, MLD->getPassThru(), MLD->getChain());
  7673. return SDValue();
  7674. }
  7675. /// A vector select of 2 constant vectors can be simplified to math/logic to
  7676. /// avoid a variable select instruction and possibly avoid constant loads.
  7677. SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
  7678. SDValue Cond = N->getOperand(0);
  7679. SDValue N1 = N->getOperand(1);
  7680. SDValue N2 = N->getOperand(2);
  7681. EVT VT = N->getValueType(0);
  7682. if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
  7683. !TLI.convertSelectOfConstantsToMath(VT) ||
  7684. !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
  7685. !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
  7686. return SDValue();
  7687. // Check if we can use the condition value to increment/decrement a single
  7688. // constant value. This simplifies a select to an add and removes a constant
  7689. // load/materialization from the general case.
  7690. bool AllAddOne = true;
  7691. bool AllSubOne = true;
  7692. unsigned Elts = VT.getVectorNumElements();
  7693. for (unsigned i = 0; i != Elts; ++i) {
  7694. SDValue N1Elt = N1.getOperand(i);
  7695. SDValue N2Elt = N2.getOperand(i);
  7696. if (N1Elt.isUndef() || N2Elt.isUndef())
  7697. continue;
  7698. const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
  7699. const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
  7700. if (C1 != C2 + 1)
  7701. AllAddOne = false;
  7702. if (C1 != C2 - 1)
  7703. AllSubOne = false;
  7704. }
  7705. // Further simplifications for the extra-special cases where the constants are
  7706. // all 0 or all -1 should be implemented as folds of these patterns.
  7707. SDLoc DL(N);
  7708. if (AllAddOne || AllSubOne) {
  7709. // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
  7710. // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
  7711. auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
  7712. SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
  7713. return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
  7714. }
  7715. // The general case for select-of-constants:
  7716. // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
  7717. // ...but that only makes sense if a vselect is slower than 2 logic ops, so
  7718. // leave that to a machine-specific pass.
  7719. return SDValue();
  7720. }
  7721. SDValue DAGCombiner::visitVSELECT(SDNode *N) {
  7722. SDValue N0 = N->getOperand(0);
  7723. SDValue N1 = N->getOperand(1);
  7724. SDValue N2 = N->getOperand(2);
  7725. EVT VT = N->getValueType(0);
  7726. SDLoc DL(N);
  7727. if (SDValue V = DAG.simplifySelect(N0, N1, N2))
  7728. return V;
  7729. // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
  7730. if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
  7731. return DAG.getSelect(DL, VT, F, N2, N1);
  7732. // Canonicalize integer abs.
  7733. // vselect (setg[te] X, 0), X, -X ->
  7734. // vselect (setgt X, -1), X, -X ->
  7735. // vselect (setl[te] X, 0), -X, X ->
  7736. // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  7737. if (N0.getOpcode() == ISD::SETCC) {
  7738. SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
  7739. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  7740. bool isAbs = false;
  7741. bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
  7742. if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
  7743. (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
  7744. N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
  7745. isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
  7746. else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
  7747. N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
  7748. isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
  7749. if (isAbs) {
  7750. if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
  7751. return DAG.getNode(ISD::ABS, DL, VT, LHS);
  7752. SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
  7753. DAG.getConstant(VT.getScalarSizeInBits() - 1,
  7754. DL, getShiftAmountTy(VT)));
  7755. SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
  7756. AddToWorklist(Shift.getNode());
  7757. AddToWorklist(Add.getNode());
  7758. return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
  7759. }
  7760. // vselect x, y (fcmp lt x, y) -> fminnum x, y
  7761. // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
  7762. //
  7763. // This is OK if we don't care about what happens if either operand is a
  7764. // NaN.
  7765. //
  7766. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
  7767. if (SDValue FMinMax =
  7768. combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
  7769. return FMinMax;
  7770. }
  7771. // If this select has a condition (setcc) with narrower operands than the
  7772. // select, try to widen the compare to match the select width.
  7773. // TODO: This should be extended to handle any constant.
  7774. // TODO: This could be extended to handle non-loading patterns, but that
  7775. // requires thorough testing to avoid regressions.
  7776. if (isNullOrNullSplat(RHS)) {
  7777. EVT NarrowVT = LHS.getValueType();
  7778. EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
  7779. EVT SetCCVT = getSetCCResultType(LHS.getValueType());
  7780. unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
  7781. unsigned WideWidth = WideVT.getScalarSizeInBits();
  7782. bool IsSigned = isSignedIntSetCC(CC);
  7783. auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
  7784. if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
  7785. SetCCWidth != 1 && SetCCWidth < WideWidth &&
  7786. TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
  7787. TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
  7788. // Both compare operands can be widened for free. The LHS can use an
  7789. // extended load, and the RHS is a constant:
  7790. // vselect (ext (setcc load(X), C)), N1, N2 -->
  7791. // vselect (setcc extload(X), C'), N1, N2
  7792. auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
  7793. SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
  7794. SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
  7795. EVT WideSetCCVT = getSetCCResultType(WideVT);
  7796. SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
  7797. return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
  7798. }
  7799. }
  7800. }
  7801. if (SimplifySelectOps(N, N1, N2))
  7802. return SDValue(N, 0); // Don't revisit N.
  7803. // Fold (vselect (build_vector all_ones), N1, N2) -> N1
  7804. if (ISD::isBuildVectorAllOnes(N0.getNode()))
  7805. return N1;
  7806. // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
  7807. if (ISD::isBuildVectorAllZeros(N0.getNode()))
  7808. return N2;
  7809. // The ConvertSelectToConcatVector function is assuming both the above
  7810. // checks for (vselect (build_vector all{ones,zeros) ...) have been made
  7811. // and addressed.
  7812. if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
  7813. N2.getOpcode() == ISD::CONCAT_VECTORS &&
  7814. ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
  7815. if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
  7816. return CV;
  7817. }
  7818. if (SDValue V = foldVSelectOfConstants(N))
  7819. return V;
  7820. return SDValue();
  7821. }
  7822. SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
  7823. SDValue N0 = N->getOperand(0);
  7824. SDValue N1 = N->getOperand(1);
  7825. SDValue N2 = N->getOperand(2);
  7826. SDValue N3 = N->getOperand(3);
  7827. SDValue N4 = N->getOperand(4);
  7828. ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
  7829. // fold select_cc lhs, rhs, x, x, cc -> x
  7830. if (N2 == N3)
  7831. return N2;
  7832. // Determine if the condition we're dealing with is constant
  7833. if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
  7834. CC, SDLoc(N), false)) {
  7835. AddToWorklist(SCC.getNode());
  7836. if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
  7837. if (!SCCC->isNullValue())
  7838. return N2; // cond always true -> true val
  7839. else
  7840. return N3; // cond always false -> false val
  7841. } else if (SCC->isUndef()) {
  7842. // When the condition is UNDEF, just return the first operand. This is
  7843. // coherent the DAG creation, no setcc node is created in this case
  7844. return N2;
  7845. } else if (SCC.getOpcode() == ISD::SETCC) {
  7846. // Fold to a simpler select_cc
  7847. SDValue SelectOp = DAG.getNode(
  7848. ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
  7849. SCC.getOperand(1), N2, N3, SCC.getOperand(2));
  7850. SelectOp->setFlags(SCC->getFlags());
  7851. return SelectOp;
  7852. }
  7853. }
  7854. // If we can fold this based on the true/false value, do so.
  7855. if (SimplifySelectOps(N, N2, N3))
  7856. return SDValue(N, 0); // Don't revisit N.
  7857. // fold select_cc into other things, such as min/max/abs
  7858. return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
  7859. }
  7860. SDValue DAGCombiner::visitSETCC(SDNode *N) {
  7861. // setcc is very commonly used as an argument to brcond. This pattern
  7862. // also lend itself to numerous combines and, as a result, it is desired
  7863. // we keep the argument to a brcond as a setcc as much as possible.
  7864. bool PreferSetCC =
  7865. N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
  7866. SDValue Combined = SimplifySetCC(
  7867. N->getValueType(0), N->getOperand(0), N->getOperand(1),
  7868. cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
  7869. if (!Combined)
  7870. return SDValue();
  7871. // If we prefer to have a setcc, and we don't, we'll try our best to
  7872. // recreate one using rebuildSetCC.
  7873. if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
  7874. SDValue NewSetCC = rebuildSetCC(Combined);
  7875. // We don't have anything interesting to combine to.
  7876. if (NewSetCC.getNode() == N)
  7877. return SDValue();
  7878. if (NewSetCC)
  7879. return NewSetCC;
  7880. }
  7881. return Combined;
  7882. }
  7883. SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
  7884. SDValue LHS = N->getOperand(0);
  7885. SDValue RHS = N->getOperand(1);
  7886. SDValue Carry = N->getOperand(2);
  7887. SDValue Cond = N->getOperand(3);
  7888. // If Carry is false, fold to a regular SETCC.
  7889. if (isNullConstant(Carry))
  7890. return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
  7891. return SDValue();
  7892. }
  7893. /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
  7894. /// a build_vector of constants.
  7895. /// This function is called by the DAGCombiner when visiting sext/zext/aext
  7896. /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
  7897. /// Vector extends are not folded if operations are legal; this is to
  7898. /// avoid introducing illegal build_vector dag nodes.
  7899. static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
  7900. SelectionDAG &DAG, bool LegalTypes) {
  7901. unsigned Opcode = N->getOpcode();
  7902. SDValue N0 = N->getOperand(0);
  7903. EVT VT = N->getValueType(0);
  7904. SDLoc DL(N);
  7905. assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
  7906. Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
  7907. Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
  7908. && "Expected EXTEND dag node in input!");
  7909. // fold (sext c1) -> c1
  7910. // fold (zext c1) -> c1
  7911. // fold (aext c1) -> c1
  7912. if (isa<ConstantSDNode>(N0))
  7913. return DAG.getNode(Opcode, DL, VT, N0);
  7914. // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
  7915. // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
  7916. // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
  7917. if (N0->getOpcode() == ISD::SELECT) {
  7918. SDValue Op1 = N0->getOperand(1);
  7919. SDValue Op2 = N0->getOperand(2);
  7920. if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
  7921. (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
  7922. // For any_extend, choose sign extension of the constants to allow a
  7923. // possible further transform to sign_extend_inreg.i.e.
  7924. //
  7925. // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
  7926. // t2: i64 = any_extend t1
  7927. // -->
  7928. // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
  7929. // -->
  7930. // t4: i64 = sign_extend_inreg t3
  7931. unsigned FoldOpc = Opcode;
  7932. if (FoldOpc == ISD::ANY_EXTEND)
  7933. FoldOpc = ISD::SIGN_EXTEND;
  7934. return DAG.getSelect(DL, VT, N0->getOperand(0),
  7935. DAG.getNode(FoldOpc, DL, VT, Op1),
  7936. DAG.getNode(FoldOpc, DL, VT, Op2));
  7937. }
  7938. }
  7939. // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
  7940. // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
  7941. // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
  7942. EVT SVT = VT.getScalarType();
  7943. if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
  7944. ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
  7945. return SDValue();
  7946. // We can fold this node into a build_vector.
  7947. unsigned VTBits = SVT.getSizeInBits();
  7948. unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
  7949. SmallVector<SDValue, 8> Elts;
  7950. unsigned NumElts = VT.getVectorNumElements();
  7951. // For zero-extensions, UNDEF elements still guarantee to have the upper
  7952. // bits set to zero.
  7953. bool IsZext =
  7954. Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
  7955. for (unsigned i = 0; i != NumElts; ++i) {
  7956. SDValue Op = N0.getOperand(i);
  7957. if (Op.isUndef()) {
  7958. Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
  7959. continue;
  7960. }
  7961. SDLoc DL(Op);
  7962. // Get the constant value and if needed trunc it to the size of the type.
  7963. // Nodes like build_vector might have constants wider than the scalar type.
  7964. APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
  7965. if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
  7966. Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
  7967. else
  7968. Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
  7969. }
  7970. return DAG.getBuildVector(VT, DL, Elts);
  7971. }
  7972. // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
  7973. // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
  7974. // transformation. Returns true if extension are possible and the above
  7975. // mentioned transformation is profitable.
  7976. static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
  7977. unsigned ExtOpc,
  7978. SmallVectorImpl<SDNode *> &ExtendNodes,
  7979. const TargetLowering &TLI) {
  7980. bool HasCopyToRegUses = false;
  7981. bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
  7982. for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
  7983. UE = N0.getNode()->use_end();
  7984. UI != UE; ++UI) {
  7985. SDNode *User = *UI;
  7986. if (User == N)
  7987. continue;
  7988. if (UI.getUse().getResNo() != N0.getResNo())
  7989. continue;
  7990. // FIXME: Only extend SETCC N, N and SETCC N, c for now.
  7991. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
  7992. ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
  7993. if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
  7994. // Sign bits will be lost after a zext.
  7995. return false;
  7996. bool Add = false;
  7997. for (unsigned i = 0; i != 2; ++i) {
  7998. SDValue UseOp = User->getOperand(i);
  7999. if (UseOp == N0)
  8000. continue;
  8001. if (!isa<ConstantSDNode>(UseOp))
  8002. return false;
  8003. Add = true;
  8004. }
  8005. if (Add)
  8006. ExtendNodes.push_back(User);
  8007. continue;
  8008. }
  8009. // If truncates aren't free and there are users we can't
  8010. // extend, it isn't worthwhile.
  8011. if (!isTruncFree)
  8012. return false;
  8013. // Remember if this value is live-out.
  8014. if (User->getOpcode() == ISD::CopyToReg)
  8015. HasCopyToRegUses = true;
  8016. }
  8017. if (HasCopyToRegUses) {
  8018. bool BothLiveOut = false;
  8019. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
  8020. UI != UE; ++UI) {
  8021. SDUse &Use = UI.getUse();
  8022. if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
  8023. BothLiveOut = true;
  8024. break;
  8025. }
  8026. }
  8027. if (BothLiveOut)
  8028. // Both unextended and extended values are live out. There had better be
  8029. // a good reason for the transformation.
  8030. return ExtendNodes.size();
  8031. }
  8032. return true;
  8033. }
  8034. void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
  8035. SDValue OrigLoad, SDValue ExtLoad,
  8036. ISD::NodeType ExtType) {
  8037. // Extend SetCC uses if necessary.
  8038. SDLoc DL(ExtLoad);
  8039. for (SDNode *SetCC : SetCCs) {
  8040. SmallVector<SDValue, 4> Ops;
  8041. for (unsigned j = 0; j != 2; ++j) {
  8042. SDValue SOp = SetCC->getOperand(j);
  8043. if (SOp == OrigLoad)
  8044. Ops.push_back(ExtLoad);
  8045. else
  8046. Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
  8047. }
  8048. Ops.push_back(SetCC->getOperand(2));
  8049. CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
  8050. }
  8051. }
  8052. // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
  8053. SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
  8054. SDValue N0 = N->getOperand(0);
  8055. EVT DstVT = N->getValueType(0);
  8056. EVT SrcVT = N0.getValueType();
  8057. assert((N->getOpcode() == ISD::SIGN_EXTEND ||
  8058. N->getOpcode() == ISD::ZERO_EXTEND) &&
  8059. "Unexpected node type (not an extend)!");
  8060. // fold (sext (load x)) to multiple smaller sextloads; same for zext.
  8061. // For example, on a target with legal v4i32, but illegal v8i32, turn:
  8062. // (v8i32 (sext (v8i16 (load x))))
  8063. // into:
  8064. // (v8i32 (concat_vectors (v4i32 (sextload x)),
  8065. // (v4i32 (sextload (x + 16)))))
  8066. // Where uses of the original load, i.e.:
  8067. // (v8i16 (load x))
  8068. // are replaced with:
  8069. // (v8i16 (truncate
  8070. // (v8i32 (concat_vectors (v4i32 (sextload x)),
  8071. // (v4i32 (sextload (x + 16)))))))
  8072. //
  8073. // This combine is only applicable to illegal, but splittable, vectors.
  8074. // All legal types, and illegal non-vector types, are handled elsewhere.
  8075. // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
  8076. //
  8077. if (N0->getOpcode() != ISD::LOAD)
  8078. return SDValue();
  8079. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  8080. if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
  8081. !N0.hasOneUse() || !LN0->isSimple() ||
  8082. !DstVT.isVector() || !DstVT.isPow2VectorType() ||
  8083. !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
  8084. return SDValue();
  8085. SmallVector<SDNode *, 4> SetCCs;
  8086. if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
  8087. return SDValue();
  8088. ISD::LoadExtType ExtType =
  8089. N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
  8090. // Try to split the vector types to get down to legal types.
  8091. EVT SplitSrcVT = SrcVT;
  8092. EVT SplitDstVT = DstVT;
  8093. while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
  8094. SplitSrcVT.getVectorNumElements() > 1) {
  8095. SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
  8096. SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
  8097. }
  8098. if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
  8099. return SDValue();
  8100. SDLoc DL(N);
  8101. const unsigned NumSplits =
  8102. DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
  8103. const unsigned Stride = SplitSrcVT.getStoreSize();
  8104. SmallVector<SDValue, 4> Loads;
  8105. SmallVector<SDValue, 4> Chains;
  8106. SDValue BasePtr = LN0->getBasePtr();
  8107. for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
  8108. const unsigned Offset = Idx * Stride;
  8109. const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
  8110. SDValue SplitLoad = DAG.getExtLoad(
  8111. ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
  8112. LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
  8113. LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
  8114. BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
  8115. DAG.getConstant(Stride, DL, BasePtr.getValueType()));
  8116. Loads.push_back(SplitLoad.getValue(0));
  8117. Chains.push_back(SplitLoad.getValue(1));
  8118. }
  8119. SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
  8120. SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
  8121. // Simplify TF.
  8122. AddToWorklist(NewChain.getNode());
  8123. CombineTo(N, NewValue);
  8124. // Replace uses of the original load (before extension)
  8125. // with a truncate of the concatenated sextloaded vectors.
  8126. SDValue Trunc =
  8127. DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
  8128. ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
  8129. CombineTo(N0.getNode(), Trunc, NewChain);
  8130. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8131. }
  8132. // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
  8133. // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
  8134. SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
  8135. assert(N->getOpcode() == ISD::ZERO_EXTEND);
  8136. EVT VT = N->getValueType(0);
  8137. EVT OrigVT = N->getOperand(0).getValueType();
  8138. if (TLI.isZExtFree(OrigVT, VT))
  8139. return SDValue();
  8140. // and/or/xor
  8141. SDValue N0 = N->getOperand(0);
  8142. if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  8143. N0.getOpcode() == ISD::XOR) ||
  8144. N0.getOperand(1).getOpcode() != ISD::Constant ||
  8145. (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
  8146. return SDValue();
  8147. // shl/shr
  8148. SDValue N1 = N0->getOperand(0);
  8149. if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
  8150. N1.getOperand(1).getOpcode() != ISD::Constant ||
  8151. (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
  8152. return SDValue();
  8153. // load
  8154. if (!isa<LoadSDNode>(N1.getOperand(0)))
  8155. return SDValue();
  8156. LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
  8157. EVT MemVT = Load->getMemoryVT();
  8158. if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
  8159. Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
  8160. return SDValue();
  8161. // If the shift op is SHL, the logic op must be AND, otherwise the result
  8162. // will be wrong.
  8163. if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
  8164. return SDValue();
  8165. if (!N0.hasOneUse() || !N1.hasOneUse())
  8166. return SDValue();
  8167. SmallVector<SDNode*, 4> SetCCs;
  8168. if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
  8169. ISD::ZERO_EXTEND, SetCCs, TLI))
  8170. return SDValue();
  8171. // Actually do the transformation.
  8172. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
  8173. Load->getChain(), Load->getBasePtr(),
  8174. Load->getMemoryVT(), Load->getMemOperand());
  8175. SDLoc DL1(N1);
  8176. SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
  8177. N1.getOperand(1));
  8178. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  8179. Mask = Mask.zext(VT.getSizeInBits());
  8180. SDLoc DL0(N0);
  8181. SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
  8182. DAG.getConstant(Mask, DL0, VT));
  8183. ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
  8184. CombineTo(N, And);
  8185. if (SDValue(Load, 0).hasOneUse()) {
  8186. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
  8187. } else {
  8188. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
  8189. Load->getValueType(0), ExtLoad);
  8190. CombineTo(Load, Trunc, ExtLoad.getValue(1));
  8191. }
  8192. // N0 is dead at this point.
  8193. recursivelyDeleteUnusedNodes(N0.getNode());
  8194. return SDValue(N,0); // Return N so it doesn't get rechecked!
  8195. }
  8196. /// If we're narrowing or widening the result of a vector select and the final
  8197. /// size is the same size as a setcc (compare) feeding the select, then try to
  8198. /// apply the cast operation to the select's operands because matching vector
  8199. /// sizes for a select condition and other operands should be more efficient.
  8200. SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
  8201. unsigned CastOpcode = Cast->getOpcode();
  8202. assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
  8203. CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
  8204. CastOpcode == ISD::FP_ROUND) &&
  8205. "Unexpected opcode for vector select narrowing/widening");
  8206. // We only do this transform before legal ops because the pattern may be
  8207. // obfuscated by target-specific operations after legalization. Do not create
  8208. // an illegal select op, however, because that may be difficult to lower.
  8209. EVT VT = Cast->getValueType(0);
  8210. if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
  8211. return SDValue();
  8212. SDValue VSel = Cast->getOperand(0);
  8213. if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
  8214. VSel.getOperand(0).getOpcode() != ISD::SETCC)
  8215. return SDValue();
  8216. // Does the setcc have the same vector size as the casted select?
  8217. SDValue SetCC = VSel.getOperand(0);
  8218. EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
  8219. if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
  8220. return SDValue();
  8221. // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
  8222. SDValue A = VSel.getOperand(1);
  8223. SDValue B = VSel.getOperand(2);
  8224. SDValue CastA, CastB;
  8225. SDLoc DL(Cast);
  8226. if (CastOpcode == ISD::FP_ROUND) {
  8227. // FP_ROUND (fptrunc) has an extra flag operand to pass along.
  8228. CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
  8229. CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
  8230. } else {
  8231. CastA = DAG.getNode(CastOpcode, DL, VT, A);
  8232. CastB = DAG.getNode(CastOpcode, DL, VT, B);
  8233. }
  8234. return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
  8235. }
  8236. // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
  8237. // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
  8238. static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
  8239. const TargetLowering &TLI, EVT VT,
  8240. bool LegalOperations, SDNode *N,
  8241. SDValue N0, ISD::LoadExtType ExtLoadType) {
  8242. SDNode *N0Node = N0.getNode();
  8243. bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
  8244. : ISD::isZEXTLoad(N0Node);
  8245. if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
  8246. !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
  8247. return SDValue();
  8248. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  8249. EVT MemVT = LN0->getMemoryVT();
  8250. if ((LegalOperations || !LN0->isSimple() ||
  8251. VT.isVector()) &&
  8252. !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
  8253. return SDValue();
  8254. SDValue ExtLoad =
  8255. DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
  8256. LN0->getBasePtr(), MemVT, LN0->getMemOperand());
  8257. Combiner.CombineTo(N, ExtLoad);
  8258. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
  8259. if (LN0->use_empty())
  8260. Combiner.recursivelyDeleteUnusedNodes(LN0);
  8261. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8262. }
  8263. // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
  8264. // Only generate vector extloads when 1) they're legal, and 2) they are
  8265. // deemed desirable by the target.
  8266. static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
  8267. const TargetLowering &TLI, EVT VT,
  8268. bool LegalOperations, SDNode *N, SDValue N0,
  8269. ISD::LoadExtType ExtLoadType,
  8270. ISD::NodeType ExtOpc) {
  8271. if (!ISD::isNON_EXTLoad(N0.getNode()) ||
  8272. !ISD::isUNINDEXEDLoad(N0.getNode()) ||
  8273. ((LegalOperations || VT.isVector() ||
  8274. !cast<LoadSDNode>(N0)->isSimple()) &&
  8275. !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
  8276. return {};
  8277. bool DoXform = true;
  8278. SmallVector<SDNode *, 4> SetCCs;
  8279. if (!N0.hasOneUse())
  8280. DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
  8281. if (VT.isVector())
  8282. DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
  8283. if (!DoXform)
  8284. return {};
  8285. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  8286. SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
  8287. LN0->getBasePtr(), N0.getValueType(),
  8288. LN0->getMemOperand());
  8289. Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
  8290. // If the load value is used only by N, replace it via CombineTo N.
  8291. bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
  8292. Combiner.CombineTo(N, ExtLoad);
  8293. if (NoReplaceTrunc) {
  8294. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
  8295. Combiner.recursivelyDeleteUnusedNodes(LN0);
  8296. } else {
  8297. SDValue Trunc =
  8298. DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
  8299. Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
  8300. }
  8301. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8302. }
  8303. static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
  8304. bool LegalOperations) {
  8305. assert((N->getOpcode() == ISD::SIGN_EXTEND ||
  8306. N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
  8307. SDValue SetCC = N->getOperand(0);
  8308. if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
  8309. !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
  8310. return SDValue();
  8311. SDValue X = SetCC.getOperand(0);
  8312. SDValue Ones = SetCC.getOperand(1);
  8313. ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
  8314. EVT VT = N->getValueType(0);
  8315. EVT XVT = X.getValueType();
  8316. // setge X, C is canonicalized to setgt, so we do not need to match that
  8317. // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
  8318. // not require the 'not' op.
  8319. if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
  8320. // Invert and smear/shift the sign bit:
  8321. // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
  8322. // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
  8323. SDLoc DL(N);
  8324. SDValue NotX = DAG.getNOT(DL, X, VT);
  8325. SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
  8326. auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
  8327. return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
  8328. }
  8329. return SDValue();
  8330. }
  8331. SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  8332. SDValue N0 = N->getOperand(0);
  8333. EVT VT = N->getValueType(0);
  8334. SDLoc DL(N);
  8335. if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
  8336. return Res;
  8337. // fold (sext (sext x)) -> (sext x)
  8338. // fold (sext (aext x)) -> (sext x)
  8339. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  8340. return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
  8341. if (N0.getOpcode() == ISD::TRUNCATE) {
  8342. // fold (sext (truncate (load x))) -> (sext (smaller load x))
  8343. // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
  8344. if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
  8345. SDNode *oye = N0.getOperand(0).getNode();
  8346. if (NarrowLoad.getNode() != N0.getNode()) {
  8347. CombineTo(N0.getNode(), NarrowLoad);
  8348. // CombineTo deleted the truncate, if needed, but not what's under it.
  8349. AddToWorklist(oye);
  8350. }
  8351. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8352. }
  8353. // See if the value being truncated is already sign extended. If so, just
  8354. // eliminate the trunc/sext pair.
  8355. SDValue Op = N0.getOperand(0);
  8356. unsigned OpBits = Op.getScalarValueSizeInBits();
  8357. unsigned MidBits = N0.getScalarValueSizeInBits();
  8358. unsigned DestBits = VT.getScalarSizeInBits();
  8359. unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
  8360. if (OpBits == DestBits) {
  8361. // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
  8362. // bits, it is already ready.
  8363. if (NumSignBits > DestBits-MidBits)
  8364. return Op;
  8365. } else if (OpBits < DestBits) {
  8366. // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
  8367. // bits, just sext from i32.
  8368. if (NumSignBits > OpBits-MidBits)
  8369. return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
  8370. } else {
  8371. // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
  8372. // bits, just truncate to i32.
  8373. if (NumSignBits > OpBits-MidBits)
  8374. return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
  8375. }
  8376. // fold (sext (truncate x)) -> (sextinreg x).
  8377. if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
  8378. N0.getValueType())) {
  8379. if (OpBits < DestBits)
  8380. Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
  8381. else if (OpBits > DestBits)
  8382. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
  8383. return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
  8384. DAG.getValueType(N0.getValueType()));
  8385. }
  8386. }
  8387. // Try to simplify (sext (load x)).
  8388. if (SDValue foldedExt =
  8389. tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
  8390. ISD::SEXTLOAD, ISD::SIGN_EXTEND))
  8391. return foldedExt;
  8392. // fold (sext (load x)) to multiple smaller sextloads.
  8393. // Only on illegal but splittable vectors.
  8394. if (SDValue ExtLoad = CombineExtLoad(N))
  8395. return ExtLoad;
  8396. // Try to simplify (sext (sextload x)).
  8397. if (SDValue foldedExt = tryToFoldExtOfExtload(
  8398. DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
  8399. return foldedExt;
  8400. // fold (sext (and/or/xor (load x), cst)) ->
  8401. // (and/or/xor (sextload x), (sext cst))
  8402. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  8403. N0.getOpcode() == ISD::XOR) &&
  8404. isa<LoadSDNode>(N0.getOperand(0)) &&
  8405. N0.getOperand(1).getOpcode() == ISD::Constant &&
  8406. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  8407. LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
  8408. EVT MemVT = LN00->getMemoryVT();
  8409. if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
  8410. LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
  8411. SmallVector<SDNode*, 4> SetCCs;
  8412. bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
  8413. ISD::SIGN_EXTEND, SetCCs, TLI);
  8414. if (DoXform) {
  8415. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
  8416. LN00->getChain(), LN00->getBasePtr(),
  8417. LN00->getMemoryVT(),
  8418. LN00->getMemOperand());
  8419. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  8420. Mask = Mask.sext(VT.getSizeInBits());
  8421. SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
  8422. ExtLoad, DAG.getConstant(Mask, DL, VT));
  8423. ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
  8424. bool NoReplaceTruncAnd = !N0.hasOneUse();
  8425. bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
  8426. CombineTo(N, And);
  8427. // If N0 has multiple uses, change other uses as well.
  8428. if (NoReplaceTruncAnd) {
  8429. SDValue TruncAnd =
  8430. DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
  8431. CombineTo(N0.getNode(), TruncAnd);
  8432. }
  8433. if (NoReplaceTrunc) {
  8434. DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
  8435. } else {
  8436. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
  8437. LN00->getValueType(0), ExtLoad);
  8438. CombineTo(LN00, Trunc, ExtLoad.getValue(1));
  8439. }
  8440. return SDValue(N,0); // Return N so it doesn't get rechecked!
  8441. }
  8442. }
  8443. }
  8444. if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
  8445. return V;
  8446. if (N0.getOpcode() == ISD::SETCC) {
  8447. SDValue N00 = N0.getOperand(0);
  8448. SDValue N01 = N0.getOperand(1);
  8449. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  8450. EVT N00VT = N0.getOperand(0).getValueType();
  8451. // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
  8452. // Only do this before legalize for now.
  8453. if (VT.isVector() && !LegalOperations &&
  8454. TLI.getBooleanContents(N00VT) ==
  8455. TargetLowering::ZeroOrNegativeOneBooleanContent) {
  8456. // On some architectures (such as SSE/NEON/etc) the SETCC result type is
  8457. // of the same size as the compared operands. Only optimize sext(setcc())
  8458. // if this is the case.
  8459. EVT SVT = getSetCCResultType(N00VT);
  8460. // If we already have the desired type, don't change it.
  8461. if (SVT != N0.getValueType()) {
  8462. // We know that the # elements of the results is the same as the
  8463. // # elements of the compare (and the # elements of the compare result
  8464. // for that matter). Check to see that they are the same size. If so,
  8465. // we know that the element size of the sext'd result matches the
  8466. // element size of the compare operands.
  8467. if (VT.getSizeInBits() == SVT.getSizeInBits())
  8468. return DAG.getSetCC(DL, VT, N00, N01, CC);
  8469. // If the desired elements are smaller or larger than the source
  8470. // elements, we can use a matching integer vector type and then
  8471. // truncate/sign extend.
  8472. EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
  8473. if (SVT == MatchingVecType) {
  8474. SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
  8475. return DAG.getSExtOrTrunc(VsetCC, DL, VT);
  8476. }
  8477. }
  8478. }
  8479. // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
  8480. // Here, T can be 1 or -1, depending on the type of the setcc and
  8481. // getBooleanContents().
  8482. unsigned SetCCWidth = N0.getScalarValueSizeInBits();
  8483. // To determine the "true" side of the select, we need to know the high bit
  8484. // of the value returned by the setcc if it evaluates to true.
  8485. // If the type of the setcc is i1, then the true case of the select is just
  8486. // sext(i1 1), that is, -1.
  8487. // If the type of the setcc is larger (say, i8) then the value of the high
  8488. // bit depends on getBooleanContents(), so ask TLI for a real "true" value
  8489. // of the appropriate width.
  8490. SDValue ExtTrueVal = (SetCCWidth == 1)
  8491. ? DAG.getAllOnesConstant(DL, VT)
  8492. : DAG.getBoolConstant(true, DL, VT, N00VT);
  8493. SDValue Zero = DAG.getConstant(0, DL, VT);
  8494. if (SDValue SCC =
  8495. SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
  8496. return SCC;
  8497. if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
  8498. EVT SetCCVT = getSetCCResultType(N00VT);
  8499. // Don't do this transform for i1 because there's a select transform
  8500. // that would reverse it.
  8501. // TODO: We should not do this transform at all without a target hook
  8502. // because a sext is likely cheaper than a select?
  8503. if (SetCCVT.getScalarSizeInBits() != 1 &&
  8504. (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
  8505. SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
  8506. return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
  8507. }
  8508. }
  8509. }
  8510. // fold (sext x) -> (zext x) if the sign bit is known zero.
  8511. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
  8512. DAG.SignBitIsZero(N0))
  8513. return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
  8514. if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
  8515. return NewVSel;
  8516. // Eliminate this sign extend by doing a negation in the destination type:
  8517. // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
  8518. if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
  8519. isNullOrNullSplat(N0.getOperand(0)) &&
  8520. N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
  8521. TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
  8522. SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
  8523. return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
  8524. }
  8525. // Eliminate this sign extend by doing a decrement in the destination type:
  8526. // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
  8527. if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
  8528. isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
  8529. N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
  8530. TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
  8531. SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
  8532. return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
  8533. }
  8534. return SDValue();
  8535. }
  8536. // isTruncateOf - If N is a truncate of some other value, return true, record
  8537. // the value being truncated in Op and which of Op's bits are zero/one in Known.
  8538. // This function computes KnownBits to avoid a duplicated call to
  8539. // computeKnownBits in the caller.
  8540. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
  8541. KnownBits &Known) {
  8542. if (N->getOpcode() == ISD::TRUNCATE) {
  8543. Op = N->getOperand(0);
  8544. Known = DAG.computeKnownBits(Op);
  8545. return true;
  8546. }
  8547. if (N.getOpcode() != ISD::SETCC ||
  8548. N.getValueType().getScalarType() != MVT::i1 ||
  8549. cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
  8550. return false;
  8551. SDValue Op0 = N->getOperand(0);
  8552. SDValue Op1 = N->getOperand(1);
  8553. assert(Op0.getValueType() == Op1.getValueType());
  8554. if (isNullOrNullSplat(Op0))
  8555. Op = Op1;
  8556. else if (isNullOrNullSplat(Op1))
  8557. Op = Op0;
  8558. else
  8559. return false;
  8560. Known = DAG.computeKnownBits(Op);
  8561. return (Known.Zero | 1).isAllOnesValue();
  8562. }
  8563. SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  8564. SDValue N0 = N->getOperand(0);
  8565. EVT VT = N->getValueType(0);
  8566. if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
  8567. return Res;
  8568. // fold (zext (zext x)) -> (zext x)
  8569. // fold (zext (aext x)) -> (zext x)
  8570. if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
  8571. return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
  8572. N0.getOperand(0));
  8573. // fold (zext (truncate x)) -> (zext x) or
  8574. // (zext (truncate x)) -> (truncate x)
  8575. // This is valid when the truncated bits of x are already zero.
  8576. SDValue Op;
  8577. KnownBits Known;
  8578. if (isTruncateOf(DAG, N0, Op, Known)) {
  8579. APInt TruncatedBits =
  8580. (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
  8581. APInt(Op.getScalarValueSizeInBits(), 0) :
  8582. APInt::getBitsSet(Op.getScalarValueSizeInBits(),
  8583. N0.getScalarValueSizeInBits(),
  8584. std::min(Op.getScalarValueSizeInBits(),
  8585. VT.getScalarSizeInBits()));
  8586. if (TruncatedBits.isSubsetOf(Known.Zero))
  8587. return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
  8588. }
  8589. // fold (zext (truncate x)) -> (and x, mask)
  8590. if (N0.getOpcode() == ISD::TRUNCATE) {
  8591. // fold (zext (truncate (load x))) -> (zext (smaller load x))
  8592. // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
  8593. if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
  8594. SDNode *oye = N0.getOperand(0).getNode();
  8595. if (NarrowLoad.getNode() != N0.getNode()) {
  8596. CombineTo(N0.getNode(), NarrowLoad);
  8597. // CombineTo deleted the truncate, if needed, but not what's under it.
  8598. AddToWorklist(oye);
  8599. }
  8600. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8601. }
  8602. EVT SrcVT = N0.getOperand(0).getValueType();
  8603. EVT MinVT = N0.getValueType();
  8604. // Try to mask before the extension to avoid having to generate a larger mask,
  8605. // possibly over several sub-vectors.
  8606. if (SrcVT.bitsLT(VT) && VT.isVector()) {
  8607. if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
  8608. TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
  8609. SDValue Op = N0.getOperand(0);
  8610. Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
  8611. AddToWorklist(Op.getNode());
  8612. SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
  8613. // Transfer the debug info; the new node is equivalent to N0.
  8614. DAG.transferDbgValues(N0, ZExtOrTrunc);
  8615. return ZExtOrTrunc;
  8616. }
  8617. }
  8618. if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
  8619. SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
  8620. AddToWorklist(Op.getNode());
  8621. SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
  8622. // We may safely transfer the debug info describing the truncate node over
  8623. // to the equivalent and operation.
  8624. DAG.transferDbgValues(N0, And);
  8625. return And;
  8626. }
  8627. }
  8628. // Fold (zext (and (trunc x), cst)) -> (and x, cst),
  8629. // if either of the casts is not free.
  8630. if (N0.getOpcode() == ISD::AND &&
  8631. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  8632. N0.getOperand(1).getOpcode() == ISD::Constant &&
  8633. (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  8634. N0.getValueType()) ||
  8635. !TLI.isZExtFree(N0.getValueType(), VT))) {
  8636. SDValue X = N0.getOperand(0).getOperand(0);
  8637. X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
  8638. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  8639. Mask = Mask.zext(VT.getSizeInBits());
  8640. SDLoc DL(N);
  8641. return DAG.getNode(ISD::AND, DL, VT,
  8642. X, DAG.getConstant(Mask, DL, VT));
  8643. }
  8644. // Try to simplify (zext (load x)).
  8645. if (SDValue foldedExt =
  8646. tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
  8647. ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
  8648. return foldedExt;
  8649. // fold (zext (load x)) to multiple smaller zextloads.
  8650. // Only on illegal but splittable vectors.
  8651. if (SDValue ExtLoad = CombineExtLoad(N))
  8652. return ExtLoad;
  8653. // fold (zext (and/or/xor (load x), cst)) ->
  8654. // (and/or/xor (zextload x), (zext cst))
  8655. // Unless (and (load x) cst) will match as a zextload already and has
  8656. // additional users.
  8657. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
  8658. N0.getOpcode() == ISD::XOR) &&
  8659. isa<LoadSDNode>(N0.getOperand(0)) &&
  8660. N0.getOperand(1).getOpcode() == ISD::Constant &&
  8661. (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
  8662. LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
  8663. EVT MemVT = LN00->getMemoryVT();
  8664. if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
  8665. LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
  8666. bool DoXform = true;
  8667. SmallVector<SDNode*, 4> SetCCs;
  8668. if (!N0.hasOneUse()) {
  8669. if (N0.getOpcode() == ISD::AND) {
  8670. auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
  8671. EVT LoadResultTy = AndC->getValueType(0);
  8672. EVT ExtVT;
  8673. if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
  8674. DoXform = false;
  8675. }
  8676. }
  8677. if (DoXform)
  8678. DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
  8679. ISD::ZERO_EXTEND, SetCCs, TLI);
  8680. if (DoXform) {
  8681. SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
  8682. LN00->getChain(), LN00->getBasePtr(),
  8683. LN00->getMemoryVT(),
  8684. LN00->getMemOperand());
  8685. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  8686. Mask = Mask.zext(VT.getSizeInBits());
  8687. SDLoc DL(N);
  8688. SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
  8689. ExtLoad, DAG.getConstant(Mask, DL, VT));
  8690. ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
  8691. bool NoReplaceTruncAnd = !N0.hasOneUse();
  8692. bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
  8693. CombineTo(N, And);
  8694. // If N0 has multiple uses, change other uses as well.
  8695. if (NoReplaceTruncAnd) {
  8696. SDValue TruncAnd =
  8697. DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
  8698. CombineTo(N0.getNode(), TruncAnd);
  8699. }
  8700. if (NoReplaceTrunc) {
  8701. DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
  8702. } else {
  8703. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
  8704. LN00->getValueType(0), ExtLoad);
  8705. CombineTo(LN00, Trunc, ExtLoad.getValue(1));
  8706. }
  8707. return SDValue(N,0); // Return N so it doesn't get rechecked!
  8708. }
  8709. }
  8710. }
  8711. // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
  8712. // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
  8713. if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
  8714. return ZExtLoad;
  8715. // Try to simplify (zext (zextload x)).
  8716. if (SDValue foldedExt = tryToFoldExtOfExtload(
  8717. DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
  8718. return foldedExt;
  8719. if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
  8720. return V;
  8721. if (N0.getOpcode() == ISD::SETCC) {
  8722. // Only do this before legalize for now.
  8723. if (!LegalOperations && VT.isVector() &&
  8724. N0.getValueType().getVectorElementType() == MVT::i1) {
  8725. EVT N00VT = N0.getOperand(0).getValueType();
  8726. if (getSetCCResultType(N00VT) == N0.getValueType())
  8727. return SDValue();
  8728. // We know that the # elements of the results is the same as the #
  8729. // elements of the compare (and the # elements of the compare result for
  8730. // that matter). Check to see that they are the same size. If so, we know
  8731. // that the element size of the sext'd result matches the element size of
  8732. // the compare operands.
  8733. SDLoc DL(N);
  8734. SDValue VecOnes = DAG.getConstant(1, DL, VT);
  8735. if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
  8736. // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
  8737. SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
  8738. N0.getOperand(1), N0.getOperand(2));
  8739. return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
  8740. }
  8741. // If the desired elements are smaller or larger than the source
  8742. // elements we can use a matching integer vector type and then
  8743. // truncate/sign extend.
  8744. EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
  8745. SDValue VsetCC =
  8746. DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
  8747. N0.getOperand(1), N0.getOperand(2));
  8748. return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
  8749. VecOnes);
  8750. }
  8751. // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  8752. SDLoc DL(N);
  8753. if (SDValue SCC = SimplifySelectCC(
  8754. DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
  8755. DAG.getConstant(0, DL, VT),
  8756. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
  8757. return SCC;
  8758. }
  8759. // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
  8760. if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
  8761. isa<ConstantSDNode>(N0.getOperand(1)) &&
  8762. N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
  8763. N0.hasOneUse()) {
  8764. SDValue ShAmt = N0.getOperand(1);
  8765. if (N0.getOpcode() == ISD::SHL) {
  8766. SDValue InnerZExt = N0.getOperand(0);
  8767. // If the original shl may be shifting out bits, do not perform this
  8768. // transformation.
  8769. unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
  8770. InnerZExt.getOperand(0).getValueSizeInBits();
  8771. if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
  8772. return SDValue();
  8773. }
  8774. SDLoc DL(N);
  8775. // Ensure that the shift amount is wide enough for the shifted value.
  8776. if (VT.getSizeInBits() >= 256)
  8777. ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
  8778. return DAG.getNode(N0.getOpcode(), DL, VT,
  8779. DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
  8780. ShAmt);
  8781. }
  8782. if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
  8783. return NewVSel;
  8784. return SDValue();
  8785. }
  8786. SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
  8787. SDValue N0 = N->getOperand(0);
  8788. EVT VT = N->getValueType(0);
  8789. if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
  8790. return Res;
  8791. // fold (aext (aext x)) -> (aext x)
  8792. // fold (aext (zext x)) -> (zext x)
  8793. // fold (aext (sext x)) -> (sext x)
  8794. if (N0.getOpcode() == ISD::ANY_EXTEND ||
  8795. N0.getOpcode() == ISD::ZERO_EXTEND ||
  8796. N0.getOpcode() == ISD::SIGN_EXTEND)
  8797. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
  8798. // fold (aext (truncate (load x))) -> (aext (smaller load x))
  8799. // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
  8800. if (N0.getOpcode() == ISD::TRUNCATE) {
  8801. if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
  8802. SDNode *oye = N0.getOperand(0).getNode();
  8803. if (NarrowLoad.getNode() != N0.getNode()) {
  8804. CombineTo(N0.getNode(), NarrowLoad);
  8805. // CombineTo deleted the truncate, if needed, but not what's under it.
  8806. AddToWorklist(oye);
  8807. }
  8808. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8809. }
  8810. }
  8811. // fold (aext (truncate x))
  8812. if (N0.getOpcode() == ISD::TRUNCATE)
  8813. return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
  8814. // Fold (aext (and (trunc x), cst)) -> (and x, cst)
  8815. // if the trunc is not free.
  8816. if (N0.getOpcode() == ISD::AND &&
  8817. N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
  8818. N0.getOperand(1).getOpcode() == ISD::Constant &&
  8819. !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
  8820. N0.getValueType())) {
  8821. SDLoc DL(N);
  8822. SDValue X = N0.getOperand(0).getOperand(0);
  8823. X = DAG.getAnyExtOrTrunc(X, DL, VT);
  8824. APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
  8825. Mask = Mask.zext(VT.getSizeInBits());
  8826. return DAG.getNode(ISD::AND, DL, VT,
  8827. X, DAG.getConstant(Mask, DL, VT));
  8828. }
  8829. // fold (aext (load x)) -> (aext (truncate (extload x)))
  8830. // None of the supported targets knows how to perform load and any_ext
  8831. // on vectors in one instruction. We only perform this transformation on
  8832. // scalars.
  8833. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
  8834. ISD::isUNINDEXEDLoad(N0.getNode()) &&
  8835. TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
  8836. bool DoXform = true;
  8837. SmallVector<SDNode*, 4> SetCCs;
  8838. if (!N0.hasOneUse())
  8839. DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
  8840. TLI);
  8841. if (DoXform) {
  8842. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  8843. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  8844. LN0->getChain(),
  8845. LN0->getBasePtr(), N0.getValueType(),
  8846. LN0->getMemOperand());
  8847. ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
  8848. // If the load value is used only by N, replace it via CombineTo N.
  8849. bool NoReplaceTrunc = N0.hasOneUse();
  8850. CombineTo(N, ExtLoad);
  8851. if (NoReplaceTrunc) {
  8852. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
  8853. recursivelyDeleteUnusedNodes(LN0);
  8854. } else {
  8855. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
  8856. N0.getValueType(), ExtLoad);
  8857. CombineTo(LN0, Trunc, ExtLoad.getValue(1));
  8858. }
  8859. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8860. }
  8861. }
  8862. // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
  8863. // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
  8864. // fold (aext ( extload x)) -> (aext (truncate (extload x)))
  8865. if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
  8866. ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
  8867. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  8868. ISD::LoadExtType ExtType = LN0->getExtensionType();
  8869. EVT MemVT = LN0->getMemoryVT();
  8870. if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
  8871. SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
  8872. VT, LN0->getChain(), LN0->getBasePtr(),
  8873. MemVT, LN0->getMemOperand());
  8874. CombineTo(N, ExtLoad);
  8875. DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
  8876. recursivelyDeleteUnusedNodes(LN0);
  8877. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  8878. }
  8879. }
  8880. if (N0.getOpcode() == ISD::SETCC) {
  8881. // For vectors:
  8882. // aext(setcc) -> vsetcc
  8883. // aext(setcc) -> truncate(vsetcc)
  8884. // aext(setcc) -> aext(vsetcc)
  8885. // Only do this before legalize for now.
  8886. if (VT.isVector() && !LegalOperations) {
  8887. EVT N00VT = N0.getOperand(0).getValueType();
  8888. if (getSetCCResultType(N00VT) == N0.getValueType())
  8889. return SDValue();
  8890. // We know that the # elements of the results is the same as the
  8891. // # elements of the compare (and the # elements of the compare result
  8892. // for that matter). Check to see that they are the same size. If so,
  8893. // we know that the element size of the sext'd result matches the
  8894. // element size of the compare operands.
  8895. if (VT.getSizeInBits() == N00VT.getSizeInBits())
  8896. return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
  8897. N0.getOperand(1),
  8898. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  8899. // If the desired elements are smaller or larger than the source
  8900. // elements we can use a matching integer vector type and then
  8901. // truncate/any extend
  8902. EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
  8903. SDValue VsetCC =
  8904. DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
  8905. N0.getOperand(1),
  8906. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  8907. return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
  8908. }
  8909. // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
  8910. SDLoc DL(N);
  8911. if (SDValue SCC = SimplifySelectCC(
  8912. DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
  8913. DAG.getConstant(0, DL, VT),
  8914. cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
  8915. return SCC;
  8916. }
  8917. return SDValue();
  8918. }
  8919. SDValue DAGCombiner::visitAssertExt(SDNode *N) {
  8920. unsigned Opcode = N->getOpcode();
  8921. SDValue N0 = N->getOperand(0);
  8922. SDValue N1 = N->getOperand(1);
  8923. EVT AssertVT = cast<VTSDNode>(N1)->getVT();
  8924. // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
  8925. if (N0.getOpcode() == Opcode &&
  8926. AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
  8927. return N0;
  8928. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
  8929. N0.getOperand(0).getOpcode() == Opcode) {
  8930. // We have an assert, truncate, assert sandwich. Make one stronger assert
  8931. // by asserting on the smallest asserted type to the larger source type.
  8932. // This eliminates the later assert:
  8933. // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
  8934. // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
  8935. SDValue BigA = N0.getOperand(0);
  8936. EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
  8937. assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
  8938. "Asserting zero/sign-extended bits to a type larger than the "
  8939. "truncated destination does not provide information");
  8940. SDLoc DL(N);
  8941. EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
  8942. SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
  8943. SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
  8944. BigA.getOperand(0), MinAssertVTVal);
  8945. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
  8946. }
  8947. // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
  8948. // than X. Just move the AssertZext in front of the truncate and drop the
  8949. // AssertSExt.
  8950. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
  8951. N0.getOperand(0).getOpcode() == ISD::AssertSext &&
  8952. Opcode == ISD::AssertZext) {
  8953. SDValue BigA = N0.getOperand(0);
  8954. EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
  8955. assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
  8956. "Asserting zero/sign-extended bits to a type larger than the "
  8957. "truncated destination does not provide information");
  8958. if (AssertVT.bitsLT(BigA_AssertVT)) {
  8959. SDLoc DL(N);
  8960. SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
  8961. BigA.getOperand(0), N1);
  8962. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
  8963. }
  8964. }
  8965. return SDValue();
  8966. }
  8967. /// If the result of a wider load is shifted to right of N bits and then
  8968. /// truncated to a narrower type and where N is a multiple of number of bits of
  8969. /// the narrower type, transform it to a narrower load from address + N / num of
  8970. /// bits of new type. Also narrow the load if the result is masked with an AND
  8971. /// to effectively produce a smaller type. If the result is to be extended, also
  8972. /// fold the extension to form a extending load.
  8973. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  8974. unsigned Opc = N->getOpcode();
  8975. ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
  8976. SDValue N0 = N->getOperand(0);
  8977. EVT VT = N->getValueType(0);
  8978. EVT ExtVT = VT;
  8979. // This transformation isn't valid for vector loads.
  8980. if (VT.isVector())
  8981. return SDValue();
  8982. unsigned ShAmt = 0;
  8983. bool HasShiftedOffset = false;
  8984. // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
  8985. // extended to VT.
  8986. if (Opc == ISD::SIGN_EXTEND_INREG) {
  8987. ExtType = ISD::SEXTLOAD;
  8988. ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
  8989. } else if (Opc == ISD::SRL) {
  8990. // Another special-case: SRL is basically zero-extending a narrower value,
  8991. // or it maybe shifting a higher subword, half or byte into the lowest
  8992. // bits.
  8993. ExtType = ISD::ZEXTLOAD;
  8994. N0 = SDValue(N, 0);
  8995. auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
  8996. auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  8997. if (!N01 || !LN0)
  8998. return SDValue();
  8999. uint64_t ShiftAmt = N01->getZExtValue();
  9000. uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
  9001. if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
  9002. ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
  9003. else
  9004. ExtVT = EVT::getIntegerVT(*DAG.getContext(),
  9005. VT.getSizeInBits() - ShiftAmt);
  9006. } else if (Opc == ISD::AND) {
  9007. // An AND with a constant mask is the same as a truncate + zero-extend.
  9008. auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
  9009. if (!AndC)
  9010. return SDValue();
  9011. const APInt &Mask = AndC->getAPIntValue();
  9012. unsigned ActiveBits = 0;
  9013. if (Mask.isMask()) {
  9014. ActiveBits = Mask.countTrailingOnes();
  9015. } else if (Mask.isShiftedMask()) {
  9016. ShAmt = Mask.countTrailingZeros();
  9017. APInt ShiftedMask = Mask.lshr(ShAmt);
  9018. ActiveBits = ShiftedMask.countTrailingOnes();
  9019. HasShiftedOffset = true;
  9020. } else
  9021. return SDValue();
  9022. ExtType = ISD::ZEXTLOAD;
  9023. ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
  9024. }
  9025. if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
  9026. SDValue SRL = N0;
  9027. if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
  9028. ShAmt = ConstShift->getZExtValue();
  9029. unsigned EVTBits = ExtVT.getSizeInBits();
  9030. // Is the shift amount a multiple of size of VT?
  9031. if ((ShAmt & (EVTBits-1)) == 0) {
  9032. N0 = N0.getOperand(0);
  9033. // Is the load width a multiple of size of VT?
  9034. if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
  9035. return SDValue();
  9036. }
  9037. // At this point, we must have a load or else we can't do the transform.
  9038. if (!isa<LoadSDNode>(N0)) return SDValue();
  9039. auto *LN0 = cast<LoadSDNode>(N0);
  9040. // Because a SRL must be assumed to *need* to zero-extend the high bits
  9041. // (as opposed to anyext the high bits), we can't combine the zextload
  9042. // lowering of SRL and an sextload.
  9043. if (LN0->getExtensionType() == ISD::SEXTLOAD)
  9044. return SDValue();
  9045. // If the shift amount is larger than the input type then we're not
  9046. // accessing any of the loaded bytes. If the load was a zextload/extload
  9047. // then the result of the shift+trunc is zero/undef (handled elsewhere).
  9048. if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
  9049. return SDValue();
  9050. // If the SRL is only used by a masking AND, we may be able to adjust
  9051. // the ExtVT to make the AND redundant.
  9052. SDNode *Mask = *(SRL->use_begin());
  9053. if (Mask->getOpcode() == ISD::AND &&
  9054. isa<ConstantSDNode>(Mask->getOperand(1))) {
  9055. const APInt &ShiftMask =
  9056. cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
  9057. if (ShiftMask.isMask()) {
  9058. EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
  9059. ShiftMask.countTrailingOnes());
  9060. // If the mask is smaller, recompute the type.
  9061. if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
  9062. TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
  9063. ExtVT = MaskedVT;
  9064. }
  9065. }
  9066. }
  9067. }
  9068. // If the load is shifted left (and the result isn't shifted back right),
  9069. // we can fold the truncate through the shift.
  9070. unsigned ShLeftAmt = 0;
  9071. if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
  9072. ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
  9073. if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  9074. ShLeftAmt = N01->getZExtValue();
  9075. N0 = N0.getOperand(0);
  9076. }
  9077. }
  9078. // If we haven't found a load, we can't narrow it.
  9079. if (!isa<LoadSDNode>(N0))
  9080. return SDValue();
  9081. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  9082. if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
  9083. return SDValue();
  9084. auto AdjustBigEndianShift = [&](unsigned ShAmt) {
  9085. unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
  9086. unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
  9087. return LVTStoreBits - EVTStoreBits - ShAmt;
  9088. };
  9089. // For big endian targets, we need to adjust the offset to the pointer to
  9090. // load the correct bytes.
  9091. if (DAG.getDataLayout().isBigEndian())
  9092. ShAmt = AdjustBigEndianShift(ShAmt);
  9093. EVT PtrType = N0.getOperand(1).getValueType();
  9094. uint64_t PtrOff = ShAmt / 8;
  9095. unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
  9096. SDLoc DL(LN0);
  9097. // The original load itself didn't wrap, so an offset within it doesn't.
  9098. SDNodeFlags Flags;
  9099. Flags.setNoUnsignedWrap(true);
  9100. SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
  9101. PtrType, LN0->getBasePtr(),
  9102. DAG.getConstant(PtrOff, DL, PtrType),
  9103. Flags);
  9104. AddToWorklist(NewPtr.getNode());
  9105. SDValue Load;
  9106. if (ExtType == ISD::NON_EXTLOAD)
  9107. Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
  9108. LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
  9109. LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
  9110. else
  9111. Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
  9112. LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
  9113. NewAlign, LN0->getMemOperand()->getFlags(),
  9114. LN0->getAAInfo());
  9115. // Replace the old load's chain with the new load's chain.
  9116. WorklistRemover DeadNodes(*this);
  9117. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  9118. // Shift the result left, if we've swallowed a left shift.
  9119. SDValue Result = Load;
  9120. if (ShLeftAmt != 0) {
  9121. EVT ShImmTy = getShiftAmountTy(Result.getValueType());
  9122. if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
  9123. ShImmTy = VT;
  9124. // If the shift amount is as large as the result size (but, presumably,
  9125. // no larger than the source) then the useful bits of the result are
  9126. // zero; we can't simply return the shortened shift, because the result
  9127. // of that operation is undefined.
  9128. if (ShLeftAmt >= VT.getSizeInBits())
  9129. Result = DAG.getConstant(0, DL, VT);
  9130. else
  9131. Result = DAG.getNode(ISD::SHL, DL, VT,
  9132. Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
  9133. }
  9134. if (HasShiftedOffset) {
  9135. // Recalculate the shift amount after it has been altered to calculate
  9136. // the offset.
  9137. if (DAG.getDataLayout().isBigEndian())
  9138. ShAmt = AdjustBigEndianShift(ShAmt);
  9139. // We're using a shifted mask, so the load now has an offset. This means
  9140. // that data has been loaded into the lower bytes than it would have been
  9141. // before, so we need to shl the loaded data into the correct position in the
  9142. // register.
  9143. SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
  9144. Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
  9145. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  9146. }
  9147. // Return the new loaded value.
  9148. return Result;
  9149. }
  9150. SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  9151. SDValue N0 = N->getOperand(0);
  9152. SDValue N1 = N->getOperand(1);
  9153. EVT VT = N->getValueType(0);
  9154. EVT EVT = cast<VTSDNode>(N1)->getVT();
  9155. unsigned VTBits = VT.getScalarSizeInBits();
  9156. unsigned EVTBits = EVT.getScalarSizeInBits();
  9157. if (N0.isUndef())
  9158. return DAG.getUNDEF(VT);
  9159. // fold (sext_in_reg c1) -> c1
  9160. if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
  9161. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
  9162. // If the input is already sign extended, just drop the extension.
  9163. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
  9164. return N0;
  9165. // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
  9166. if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  9167. EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
  9168. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  9169. N0.getOperand(0), N1);
  9170. // fold (sext_in_reg (sext x)) -> (sext x)
  9171. // fold (sext_in_reg (aext x)) -> (sext x)
  9172. // if x is small enough or if we know that x has more than 1 sign bit and the
  9173. // sign_extend_inreg is extending from one of them.
  9174. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
  9175. SDValue N00 = N0.getOperand(0);
  9176. unsigned N00Bits = N00.getScalarValueSizeInBits();
  9177. if ((N00Bits <= EVTBits ||
  9178. (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
  9179. (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
  9180. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
  9181. }
  9182. // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
  9183. if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
  9184. N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
  9185. N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
  9186. N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
  9187. if (!LegalOperations ||
  9188. TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
  9189. return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
  9190. N0.getOperand(0));
  9191. }
  9192. // fold (sext_in_reg (zext x)) -> (sext x)
  9193. // iff we are extending the source sign bit.
  9194. if (N0.getOpcode() == ISD::ZERO_EXTEND) {
  9195. SDValue N00 = N0.getOperand(0);
  9196. if (N00.getScalarValueSizeInBits() == EVTBits &&
  9197. (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
  9198. return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
  9199. }
  9200. // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
  9201. if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
  9202. return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
  9203. // fold operands of sext_in_reg based on knowledge that the top bits are not
  9204. // demanded.
  9205. if (SimplifyDemandedBits(SDValue(N, 0)))
  9206. return SDValue(N, 0);
  9207. // fold (sext_in_reg (load x)) -> (smaller sextload x)
  9208. // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
  9209. if (SDValue NarrowLoad = ReduceLoadWidth(N))
  9210. return NarrowLoad;
  9211. // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
  9212. // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
  9213. // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
  9214. if (N0.getOpcode() == ISD::SRL) {
  9215. if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
  9216. if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
  9217. // We can turn this into an SRA iff the input to the SRL is already sign
  9218. // extended enough.
  9219. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
  9220. if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
  9221. return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
  9222. N0.getOperand(1));
  9223. }
  9224. }
  9225. // fold (sext_inreg (extload x)) -> (sextload x)
  9226. // If sextload is not supported by target, we can only do the combine when
  9227. // load has one use. Doing otherwise can block folding the extload with other
  9228. // extends that the target does support.
  9229. if (ISD::isEXTLoad(N0.getNode()) &&
  9230. ISD::isUNINDEXEDLoad(N0.getNode()) &&
  9231. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  9232. ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
  9233. N0.hasOneUse()) ||
  9234. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
  9235. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  9236. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  9237. LN0->getChain(),
  9238. LN0->getBasePtr(), EVT,
  9239. LN0->getMemOperand());
  9240. CombineTo(N, ExtLoad);
  9241. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  9242. AddToWorklist(ExtLoad.getNode());
  9243. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  9244. }
  9245. // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
  9246. if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
  9247. N0.hasOneUse() &&
  9248. EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
  9249. ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
  9250. TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
  9251. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  9252. SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
  9253. LN0->getChain(),
  9254. LN0->getBasePtr(), EVT,
  9255. LN0->getMemOperand());
  9256. CombineTo(N, ExtLoad);
  9257. CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
  9258. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  9259. }
  9260. // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
  9261. if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
  9262. if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
  9263. N0.getOperand(1), false))
  9264. return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
  9265. BSwap, N1);
  9266. }
  9267. return SDValue();
  9268. }
  9269. SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
  9270. SDValue N0 = N->getOperand(0);
  9271. EVT VT = N->getValueType(0);
  9272. if (N0.isUndef())
  9273. return DAG.getUNDEF(VT);
  9274. if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
  9275. return Res;
  9276. if (SimplifyDemandedVectorElts(SDValue(N, 0)))
  9277. return SDValue(N, 0);
  9278. return SDValue();
  9279. }
  9280. SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
  9281. SDValue N0 = N->getOperand(0);
  9282. EVT VT = N->getValueType(0);
  9283. if (N0.isUndef())
  9284. return DAG.getUNDEF(VT);
  9285. if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
  9286. return Res;
  9287. if (SimplifyDemandedVectorElts(SDValue(N, 0)))
  9288. return SDValue(N, 0);
  9289. return SDValue();
  9290. }
  9291. SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
  9292. SDValue N0 = N->getOperand(0);
  9293. EVT VT = N->getValueType(0);
  9294. EVT SrcVT = N0.getValueType();
  9295. bool isLE = DAG.getDataLayout().isLittleEndian();
  9296. // noop truncate
  9297. if (SrcVT == VT)
  9298. return N0;
  9299. // fold (truncate (truncate x)) -> (truncate x)
  9300. if (N0.getOpcode() == ISD::TRUNCATE)
  9301. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  9302. // fold (truncate c1) -> c1
  9303. if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
  9304. SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
  9305. if (C.getNode() != N)
  9306. return C;
  9307. }
  9308. // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
  9309. if (N0.getOpcode() == ISD::ZERO_EXTEND ||
  9310. N0.getOpcode() == ISD::SIGN_EXTEND ||
  9311. N0.getOpcode() == ISD::ANY_EXTEND) {
  9312. // if the source is smaller than the dest, we still need an extend.
  9313. if (N0.getOperand(0).getValueType().bitsLT(VT))
  9314. return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
  9315. // if the source is larger than the dest, than we just need the truncate.
  9316. if (N0.getOperand(0).getValueType().bitsGT(VT))
  9317. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
  9318. // if the source and dest are the same type, we can drop both the extend
  9319. // and the truncate.
  9320. return N0.getOperand(0);
  9321. }
  9322. // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
  9323. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
  9324. return SDValue();
  9325. // Fold extract-and-trunc into a narrow extract. For example:
  9326. // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
  9327. // i32 y = TRUNCATE(i64 x)
  9328. // -- becomes --
  9329. // v16i8 b = BITCAST (v2i64 val)
  9330. // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
  9331. //
  9332. // Note: We only run this optimization after type legalization (which often
  9333. // creates this pattern) and before operation legalization after which
  9334. // we need to be more careful about the vector instructions that we generate.
  9335. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  9336. LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
  9337. EVT VecTy = N0.getOperand(0).getValueType();
  9338. EVT ExTy = N0.getValueType();
  9339. EVT TrTy = N->getValueType(0);
  9340. unsigned NumElem = VecTy.getVectorNumElements();
  9341. unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
  9342. EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
  9343. assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
  9344. SDValue EltNo = N0->getOperand(1);
  9345. if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
  9346. int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  9347. EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  9348. int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
  9349. SDLoc DL(N);
  9350. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
  9351. DAG.getBitcast(NVT, N0.getOperand(0)),
  9352. DAG.getConstant(Index, DL, IndexTy));
  9353. }
  9354. }
  9355. // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
  9356. if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
  9357. if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
  9358. TLI.isTruncateFree(SrcVT, VT)) {
  9359. SDLoc SL(N0);
  9360. SDValue Cond = N0.getOperand(0);
  9361. SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
  9362. SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
  9363. return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
  9364. }
  9365. }
  9366. // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
  9367. if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
  9368. (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
  9369. TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
  9370. SDValue Amt = N0.getOperand(1);
  9371. KnownBits Known = DAG.computeKnownBits(Amt);
  9372. unsigned Size = VT.getScalarSizeInBits();
  9373. if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
  9374. SDLoc SL(N);
  9375. EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
  9376. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
  9377. if (AmtVT != Amt.getValueType()) {
  9378. Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
  9379. AddToWorklist(Amt.getNode());
  9380. }
  9381. return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
  9382. }
  9383. }
  9384. // Attempt to pre-truncate BUILD_VECTOR sources.
  9385. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
  9386. TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
  9387. SDLoc DL(N);
  9388. EVT SVT = VT.getScalarType();
  9389. SmallVector<SDValue, 8> TruncOps;
  9390. for (const SDValue &Op : N0->op_values()) {
  9391. SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
  9392. TruncOps.push_back(TruncOp);
  9393. }
  9394. return DAG.getBuildVector(VT, DL, TruncOps);
  9395. }
  9396. // Fold a series of buildvector, bitcast, and truncate if possible.
  9397. // For example fold
  9398. // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
  9399. // (2xi32 (buildvector x, y)).
  9400. if (Level == AfterLegalizeVectorOps && VT.isVector() &&
  9401. N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
  9402. N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
  9403. N0.getOperand(0).hasOneUse()) {
  9404. SDValue BuildVect = N0.getOperand(0);
  9405. EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
  9406. EVT TruncVecEltTy = VT.getVectorElementType();
  9407. // Check that the element types match.
  9408. if (BuildVectEltTy == TruncVecEltTy) {
  9409. // Now we only need to compute the offset of the truncated elements.
  9410. unsigned BuildVecNumElts = BuildVect.getNumOperands();
  9411. unsigned TruncVecNumElts = VT.getVectorNumElements();
  9412. unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
  9413. assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
  9414. "Invalid number of elements");
  9415. SmallVector<SDValue, 8> Opnds;
  9416. for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
  9417. Opnds.push_back(BuildVect.getOperand(i));
  9418. return DAG.getBuildVector(VT, SDLoc(N), Opnds);
  9419. }
  9420. }
  9421. // See if we can simplify the input to this truncate through knowledge that
  9422. // only the low bits are being used.
  9423. // For example "trunc (or (shl x, 8), y)" // -> trunc y
  9424. // Currently we only perform this optimization on scalars because vectors
  9425. // may have different active low bits.
  9426. if (!VT.isVector()) {
  9427. APInt Mask =
  9428. APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
  9429. if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
  9430. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
  9431. }
  9432. // fold (truncate (load x)) -> (smaller load x)
  9433. // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
  9434. if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
  9435. if (SDValue Reduced = ReduceLoadWidth(N))
  9436. return Reduced;
  9437. // Handle the case where the load remains an extending load even
  9438. // after truncation.
  9439. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
  9440. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  9441. if (LN0->isSimple() &&
  9442. LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
  9443. SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
  9444. VT, LN0->getChain(), LN0->getBasePtr(),
  9445. LN0->getMemoryVT(),
  9446. LN0->getMemOperand());
  9447. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
  9448. return NewLoad;
  9449. }
  9450. }
  9451. }
  9452. // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
  9453. // where ... are all 'undef'.
  9454. if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
  9455. SmallVector<EVT, 8> VTs;
  9456. SDValue V;
  9457. unsigned Idx = 0;
  9458. unsigned NumDefs = 0;
  9459. for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
  9460. SDValue X = N0.getOperand(i);
  9461. if (!X.isUndef()) {
  9462. V = X;
  9463. Idx = i;
  9464. NumDefs++;
  9465. }
  9466. // Stop if more than one members are non-undef.
  9467. if (NumDefs > 1)
  9468. break;
  9469. VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
  9470. VT.getVectorElementType(),
  9471. X.getValueType().getVectorNumElements()));
  9472. }
  9473. if (NumDefs == 0)
  9474. return DAG.getUNDEF(VT);
  9475. if (NumDefs == 1) {
  9476. assert(V.getNode() && "The single defined operand is empty!");
  9477. SmallVector<SDValue, 8> Opnds;
  9478. for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
  9479. if (i != Idx) {
  9480. Opnds.push_back(DAG.getUNDEF(VTs[i]));
  9481. continue;
  9482. }
  9483. SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
  9484. AddToWorklist(NV.getNode());
  9485. Opnds.push_back(NV);
  9486. }
  9487. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
  9488. }
  9489. }
  9490. // Fold truncate of a bitcast of a vector to an extract of the low vector
  9491. // element.
  9492. //
  9493. // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
  9494. if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
  9495. SDValue VecSrc = N0.getOperand(0);
  9496. EVT SrcVT = VecSrc.getValueType();
  9497. if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
  9498. (!LegalOperations ||
  9499. TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
  9500. SDLoc SL(N);
  9501. EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
  9502. unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
  9503. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
  9504. VecSrc, DAG.getConstant(Idx, SL, IdxVT));
  9505. }
  9506. }
  9507. // Simplify the operands using demanded-bits information.
  9508. if (!VT.isVector() &&
  9509. SimplifyDemandedBits(SDValue(N, 0)))
  9510. return SDValue(N, 0);
  9511. // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
  9512. // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
  9513. // When the adde's carry is not used.
  9514. if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
  9515. N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
  9516. // We only do for addcarry before legalize operation
  9517. ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
  9518. TLI.isOperationLegal(N0.getOpcode(), VT))) {
  9519. SDLoc SL(N);
  9520. auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
  9521. auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
  9522. auto VTs = DAG.getVTList(VT, N0->getValueType(1));
  9523. return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
  9524. }
  9525. // fold (truncate (extract_subvector(ext x))) ->
  9526. // (extract_subvector x)
  9527. // TODO: This can be generalized to cover cases where the truncate and extract
  9528. // do not fully cancel each other out.
  9529. if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
  9530. SDValue N00 = N0.getOperand(0);
  9531. if (N00.getOpcode() == ISD::SIGN_EXTEND ||
  9532. N00.getOpcode() == ISD::ZERO_EXTEND ||
  9533. N00.getOpcode() == ISD::ANY_EXTEND) {
  9534. if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
  9535. VT.getVectorElementType())
  9536. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
  9537. N00.getOperand(0), N0.getOperand(1));
  9538. }
  9539. }
  9540. if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
  9541. return NewVSel;
  9542. // Narrow a suitable binary operation with a non-opaque constant operand by
  9543. // moving it ahead of the truncate. This is limited to pre-legalization
  9544. // because targets may prefer a wider type during later combines and invert
  9545. // this transform.
  9546. switch (N0.getOpcode()) {
  9547. case ISD::ADD:
  9548. case ISD::SUB:
  9549. case ISD::MUL:
  9550. case ISD::AND:
  9551. case ISD::OR:
  9552. case ISD::XOR:
  9553. if (!LegalOperations && N0.hasOneUse() &&
  9554. (isConstantOrConstantVector(N0.getOperand(0), true) ||
  9555. isConstantOrConstantVector(N0.getOperand(1), true))) {
  9556. // TODO: We already restricted this to pre-legalization, but for vectors
  9557. // we are extra cautious to not create an unsupported operation.
  9558. // Target-specific changes are likely needed to avoid regressions here.
  9559. if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
  9560. SDLoc DL(N);
  9561. SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
  9562. SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
  9563. return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
  9564. }
  9565. }
  9566. }
  9567. return SDValue();
  9568. }
  9569. static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
  9570. SDValue Elt = N->getOperand(i);
  9571. if (Elt.getOpcode() != ISD::MERGE_VALUES)
  9572. return Elt.getNode();
  9573. return Elt.getOperand(Elt.getResNo()).getNode();
  9574. }
  9575. /// build_pair (load, load) -> load
  9576. /// if load locations are consecutive.
  9577. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
  9578. assert(N->getOpcode() == ISD::BUILD_PAIR);
  9579. LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
  9580. LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
  9581. // A BUILD_PAIR is always having the least significant part in elt 0 and the
  9582. // most significant part in elt 1. So when combining into one large load, we
  9583. // need to consider the endianness.
  9584. if (DAG.getDataLayout().isBigEndian())
  9585. std::swap(LD1, LD2);
  9586. if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
  9587. LD1->getAddressSpace() != LD2->getAddressSpace())
  9588. return SDValue();
  9589. EVT LD1VT = LD1->getValueType(0);
  9590. unsigned LD1Bytes = LD1VT.getStoreSize();
  9591. if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
  9592. DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
  9593. unsigned Align = LD1->getAlignment();
  9594. unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
  9595. VT.getTypeForEVT(*DAG.getContext()));
  9596. if (NewAlign <= Align &&
  9597. (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
  9598. return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
  9599. LD1->getPointerInfo(), Align);
  9600. }
  9601. return SDValue();
  9602. }
  9603. static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
  9604. // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
  9605. // and Lo parts; on big-endian machines it doesn't.
  9606. return DAG.getDataLayout().isBigEndian() ? 1 : 0;
  9607. }
  9608. static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
  9609. const TargetLowering &TLI) {
  9610. // If this is not a bitcast to an FP type or if the target doesn't have
  9611. // IEEE754-compliant FP logic, we're done.
  9612. EVT VT = N->getValueType(0);
  9613. if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
  9614. return SDValue();
  9615. // TODO: Handle cases where the integer constant is a different scalar
  9616. // bitwidth to the FP.
  9617. SDValue N0 = N->getOperand(0);
  9618. EVT SourceVT = N0.getValueType();
  9619. if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
  9620. return SDValue();
  9621. unsigned FPOpcode;
  9622. APInt SignMask;
  9623. switch (N0.getOpcode()) {
  9624. case ISD::AND:
  9625. FPOpcode = ISD::FABS;
  9626. SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
  9627. break;
  9628. case ISD::XOR:
  9629. FPOpcode = ISD::FNEG;
  9630. SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
  9631. break;
  9632. case ISD::OR:
  9633. FPOpcode = ISD::FABS;
  9634. SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
  9635. break;
  9636. default:
  9637. return SDValue();
  9638. }
  9639. // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
  9640. // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
  9641. // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
  9642. // fneg (fabs X)
  9643. SDValue LogicOp0 = N0.getOperand(0);
  9644. ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
  9645. if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
  9646. LogicOp0.getOpcode() == ISD::BITCAST &&
  9647. LogicOp0.getOperand(0).getValueType() == VT) {
  9648. SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
  9649. NumFPLogicOpsConv++;
  9650. if (N0.getOpcode() == ISD::OR)
  9651. return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
  9652. return FPOp;
  9653. }
  9654. return SDValue();
  9655. }
  9656. SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  9657. SDValue N0 = N->getOperand(0);
  9658. EVT VT = N->getValueType(0);
  9659. if (N0.isUndef())
  9660. return DAG.getUNDEF(VT);
  9661. // If the input is a BUILD_VECTOR with all constant elements, fold this now.
  9662. // Only do this before legalize types, unless both types are integer and the
  9663. // scalar type is legal. Only do this before legalize ops, since the target
  9664. // maybe depending on the bitcast.
  9665. // First check to see if this is all constant.
  9666. // TODO: Support FP bitcasts after legalize types.
  9667. if (VT.isVector() &&
  9668. (!LegalTypes ||
  9669. (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
  9670. TLI.isTypeLegal(VT.getVectorElementType()))) &&
  9671. N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
  9672. cast<BuildVectorSDNode>(N0)->isConstant())
  9673. return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
  9674. VT.getVectorElementType());
  9675. // If the input is a constant, let getNode fold it.
  9676. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
  9677. // If we can't allow illegal operations, we need to check that this is just
  9678. // a fp -> int or int -> conversion and that the resulting operation will
  9679. // be legal.
  9680. if (!LegalOperations ||
  9681. (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
  9682. TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
  9683. (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
  9684. TLI.isOperationLegal(ISD::Constant, VT))) {
  9685. SDValue C = DAG.getBitcast(VT, N0);
  9686. if (C.getNode() != N)
  9687. return C;
  9688. }
  9689. }
  9690. // (conv (conv x, t1), t2) -> (conv x, t2)
  9691. if (N0.getOpcode() == ISD::BITCAST)
  9692. return DAG.getBitcast(VT, N0.getOperand(0));
  9693. // fold (conv (load x)) -> (load (conv*)x)
  9694. // If the resultant load doesn't need a higher alignment than the original!
  9695. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  9696. // Do not remove the cast if the types differ in endian layout.
  9697. TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
  9698. TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
  9699. // If the load is volatile, we only want to change the load type if the
  9700. // resulting load is legal. Otherwise we might increase the number of
  9701. // memory accesses. We don't care if the original type was legal or not
  9702. // as we assume software couldn't rely on the number of accesses of an
  9703. // illegal type.
  9704. ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
  9705. TLI.isOperationLegal(ISD::LOAD, VT))) {
  9706. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  9707. if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
  9708. *LN0->getMemOperand())) {
  9709. SDValue Load =
  9710. DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
  9711. LN0->getPointerInfo(), LN0->getAlignment(),
  9712. LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
  9713. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  9714. return Load;
  9715. }
  9716. }
  9717. if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
  9718. return V;
  9719. // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
  9720. // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
  9721. //
  9722. // For ppc_fp128:
  9723. // fold (bitcast (fneg x)) ->
  9724. // flipbit = signbit
  9725. // (xor (bitcast x) (build_pair flipbit, flipbit))
  9726. //
  9727. // fold (bitcast (fabs x)) ->
  9728. // flipbit = (and (extract_element (bitcast x), 0), signbit)
  9729. // (xor (bitcast x) (build_pair flipbit, flipbit))
  9730. // This often reduces constant pool loads.
  9731. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
  9732. (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
  9733. N0.getNode()->hasOneUse() && VT.isInteger() &&
  9734. !VT.isVector() && !N0.getValueType().isVector()) {
  9735. SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
  9736. AddToWorklist(NewConv.getNode());
  9737. SDLoc DL(N);
  9738. if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
  9739. assert(VT.getSizeInBits() == 128);
  9740. SDValue SignBit = DAG.getConstant(
  9741. APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
  9742. SDValue FlipBit;
  9743. if (N0.getOpcode() == ISD::FNEG) {
  9744. FlipBit = SignBit;
  9745. AddToWorklist(FlipBit.getNode());
  9746. } else {
  9747. assert(N0.getOpcode() == ISD::FABS);
  9748. SDValue Hi =
  9749. DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
  9750. DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
  9751. SDLoc(NewConv)));
  9752. AddToWorklist(Hi.getNode());
  9753. FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
  9754. AddToWorklist(FlipBit.getNode());
  9755. }
  9756. SDValue FlipBits =
  9757. DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
  9758. AddToWorklist(FlipBits.getNode());
  9759. return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
  9760. }
  9761. APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
  9762. if (N0.getOpcode() == ISD::FNEG)
  9763. return DAG.getNode(ISD::XOR, DL, VT,
  9764. NewConv, DAG.getConstant(SignBit, DL, VT));
  9765. assert(N0.getOpcode() == ISD::FABS);
  9766. return DAG.getNode(ISD::AND, DL, VT,
  9767. NewConv, DAG.getConstant(~SignBit, DL, VT));
  9768. }
  9769. // fold (bitconvert (fcopysign cst, x)) ->
  9770. // (or (and (bitconvert x), sign), (and cst, (not sign)))
  9771. // Note that we don't handle (copysign x, cst) because this can always be
  9772. // folded to an fneg or fabs.
  9773. //
  9774. // For ppc_fp128:
  9775. // fold (bitcast (fcopysign cst, x)) ->
  9776. // flipbit = (and (extract_element
  9777. // (xor (bitcast cst), (bitcast x)), 0),
  9778. // signbit)
  9779. // (xor (bitcast cst) (build_pair flipbit, flipbit))
  9780. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
  9781. isa<ConstantFPSDNode>(N0.getOperand(0)) &&
  9782. VT.isInteger() && !VT.isVector()) {
  9783. unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
  9784. EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
  9785. if (isTypeLegal(IntXVT)) {
  9786. SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
  9787. AddToWorklist(X.getNode());
  9788. // If X has a different width than the result/lhs, sext it or truncate it.
  9789. unsigned VTWidth = VT.getSizeInBits();
  9790. if (OrigXWidth < VTWidth) {
  9791. X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
  9792. AddToWorklist(X.getNode());
  9793. } else if (OrigXWidth > VTWidth) {
  9794. // To get the sign bit in the right place, we have to shift it right
  9795. // before truncating.
  9796. SDLoc DL(X);
  9797. X = DAG.getNode(ISD::SRL, DL,
  9798. X.getValueType(), X,
  9799. DAG.getConstant(OrigXWidth-VTWidth, DL,
  9800. X.getValueType()));
  9801. AddToWorklist(X.getNode());
  9802. X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
  9803. AddToWorklist(X.getNode());
  9804. }
  9805. if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
  9806. APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
  9807. SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
  9808. AddToWorklist(Cst.getNode());
  9809. SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
  9810. AddToWorklist(X.getNode());
  9811. SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
  9812. AddToWorklist(XorResult.getNode());
  9813. SDValue XorResult64 = DAG.getNode(
  9814. ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
  9815. DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
  9816. SDLoc(XorResult)));
  9817. AddToWorklist(XorResult64.getNode());
  9818. SDValue FlipBit =
  9819. DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
  9820. DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
  9821. AddToWorklist(FlipBit.getNode());
  9822. SDValue FlipBits =
  9823. DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
  9824. AddToWorklist(FlipBits.getNode());
  9825. return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
  9826. }
  9827. APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
  9828. X = DAG.getNode(ISD::AND, SDLoc(X), VT,
  9829. X, DAG.getConstant(SignBit, SDLoc(X), VT));
  9830. AddToWorklist(X.getNode());
  9831. SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
  9832. Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
  9833. Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
  9834. AddToWorklist(Cst.getNode());
  9835. return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
  9836. }
  9837. }
  9838. // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
  9839. if (N0.getOpcode() == ISD::BUILD_PAIR)
  9840. if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
  9841. return CombineLD;
  9842. // Remove double bitcasts from shuffles - this is often a legacy of
  9843. // XformToShuffleWithZero being used to combine bitmaskings (of
  9844. // float vectors bitcast to integer vectors) into shuffles.
  9845. // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
  9846. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
  9847. N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
  9848. VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
  9849. !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
  9850. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
  9851. // If operands are a bitcast, peek through if it casts the original VT.
  9852. // If operands are a constant, just bitcast back to original VT.
  9853. auto PeekThroughBitcast = [&](SDValue Op) {
  9854. if (Op.getOpcode() == ISD::BITCAST &&
  9855. Op.getOperand(0).getValueType() == VT)
  9856. return SDValue(Op.getOperand(0));
  9857. if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
  9858. ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
  9859. return DAG.getBitcast(VT, Op);
  9860. return SDValue();
  9861. };
  9862. // FIXME: If either input vector is bitcast, try to convert the shuffle to
  9863. // the result type of this bitcast. This would eliminate at least one
  9864. // bitcast. See the transform in InstCombine.
  9865. SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
  9866. SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
  9867. if (!(SV0 && SV1))
  9868. return SDValue();
  9869. int MaskScale =
  9870. VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
  9871. SmallVector<int, 8> NewMask;
  9872. for (int M : SVN->getMask())
  9873. for (int i = 0; i != MaskScale; ++i)
  9874. NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
  9875. SDValue LegalShuffle =
  9876. TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
  9877. if (LegalShuffle)
  9878. return LegalShuffle;
  9879. }
  9880. return SDValue();
  9881. }
  9882. SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
  9883. EVT VT = N->getValueType(0);
  9884. return CombineConsecutiveLoads(N, VT);
  9885. }
  9886. /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
  9887. /// operands. DstEltVT indicates the destination element value type.
  9888. SDValue DAGCombiner::
  9889. ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
  9890. EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
  9891. // If this is already the right type, we're done.
  9892. if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
  9893. unsigned SrcBitSize = SrcEltVT.getSizeInBits();
  9894. unsigned DstBitSize = DstEltVT.getSizeInBits();
  9895. // If this is a conversion of N elements of one type to N elements of another
  9896. // type, convert each element. This handles FP<->INT cases.
  9897. if (SrcBitSize == DstBitSize) {
  9898. SmallVector<SDValue, 8> Ops;
  9899. for (SDValue Op : BV->op_values()) {
  9900. // If the vector element type is not legal, the BUILD_VECTOR operands
  9901. // are promoted and implicitly truncated. Make that explicit here.
  9902. if (Op.getValueType() != SrcEltVT)
  9903. Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
  9904. Ops.push_back(DAG.getBitcast(DstEltVT, Op));
  9905. AddToWorklist(Ops.back().getNode());
  9906. }
  9907. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  9908. BV->getValueType(0).getVectorNumElements());
  9909. return DAG.getBuildVector(VT, SDLoc(BV), Ops);
  9910. }
  9911. // Otherwise, we're growing or shrinking the elements. To avoid having to
  9912. // handle annoying details of growing/shrinking FP values, we convert them to
  9913. // int first.
  9914. if (SrcEltVT.isFloatingPoint()) {
  9915. // Convert the input float vector to a int vector where the elements are the
  9916. // same sizes.
  9917. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
  9918. BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
  9919. SrcEltVT = IntVT;
  9920. }
  9921. // Now we know the input is an integer vector. If the output is a FP type,
  9922. // convert to integer first, then to FP of the right size.
  9923. if (DstEltVT.isFloatingPoint()) {
  9924. EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
  9925. SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
  9926. // Next, convert to FP elements of the same size.
  9927. return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
  9928. }
  9929. SDLoc DL(BV);
  9930. // Okay, we know the src/dst types are both integers of differing types.
  9931. // Handling growing first.
  9932. assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
  9933. if (SrcBitSize < DstBitSize) {
  9934. unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
  9935. SmallVector<SDValue, 8> Ops;
  9936. for (unsigned i = 0, e = BV->getNumOperands(); i != e;
  9937. i += NumInputsPerOutput) {
  9938. bool isLE = DAG.getDataLayout().isLittleEndian();
  9939. APInt NewBits = APInt(DstBitSize, 0);
  9940. bool EltIsUndef = true;
  9941. for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
  9942. // Shift the previously computed bits over.
  9943. NewBits <<= SrcBitSize;
  9944. SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
  9945. if (Op.isUndef()) continue;
  9946. EltIsUndef = false;
  9947. NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
  9948. zextOrTrunc(SrcBitSize).zext(DstBitSize);
  9949. }
  9950. if (EltIsUndef)
  9951. Ops.push_back(DAG.getUNDEF(DstEltVT));
  9952. else
  9953. Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
  9954. }
  9955. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
  9956. return DAG.getBuildVector(VT, DL, Ops);
  9957. }
  9958. // Finally, this must be the case where we are shrinking elements: each input
  9959. // turns into multiple outputs.
  9960. unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
  9961. EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
  9962. NumOutputsPerInput*BV->getNumOperands());
  9963. SmallVector<SDValue, 8> Ops;
  9964. for (const SDValue &Op : BV->op_values()) {
  9965. if (Op.isUndef()) {
  9966. Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
  9967. continue;
  9968. }
  9969. APInt OpVal = cast<ConstantSDNode>(Op)->
  9970. getAPIntValue().zextOrTrunc(SrcBitSize);
  9971. for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
  9972. APInt ThisVal = OpVal.trunc(DstBitSize);
  9973. Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
  9974. OpVal.lshrInPlace(DstBitSize);
  9975. }
  9976. // For big endian targets, swap the order of the pieces of each element.
  9977. if (DAG.getDataLayout().isBigEndian())
  9978. std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
  9979. }
  9980. return DAG.getBuildVector(VT, DL, Ops);
  9981. }
  9982. static bool isContractable(SDNode *N) {
  9983. SDNodeFlags F = N->getFlags();
  9984. return F.hasAllowContract() || F.hasAllowReassociation();
  9985. }
  9986. /// Try to perform FMA combining on a given FADD node.
  9987. SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
  9988. SDValue N0 = N->getOperand(0);
  9989. SDValue N1 = N->getOperand(1);
  9990. EVT VT = N->getValueType(0);
  9991. SDLoc SL(N);
  9992. const TargetOptions &Options = DAG.getTarget().Options;
  9993. // Floating-point multiply-add with intermediate rounding.
  9994. bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
  9995. // Floating-point multiply-add without intermediate rounding.
  9996. bool HasFMA =
  9997. TLI.isFMAFasterThanFMulAndFAdd(VT) &&
  9998. (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
  9999. // No valid opcode, do not combine.
  10000. if (!HasFMAD && !HasFMA)
  10001. return SDValue();
  10002. SDNodeFlags Flags = N->getFlags();
  10003. bool CanFuse = Options.UnsafeFPMath || isContractable(N);
  10004. bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
  10005. CanFuse || HasFMAD);
  10006. // If the addition is not contractable, do not combine.
  10007. if (!AllowFusionGlobally && !isContractable(N))
  10008. return SDValue();
  10009. const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
  10010. if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
  10011. return SDValue();
  10012. // Always prefer FMAD to FMA for precision.
  10013. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
  10014. bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
  10015. // Is the node an FMUL and contractable either due to global flags or
  10016. // SDNodeFlags.
  10017. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
  10018. if (N.getOpcode() != ISD::FMUL)
  10019. return false;
  10020. return AllowFusionGlobally || isContractable(N.getNode());
  10021. };
  10022. // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
  10023. // prefer to fold the multiply with fewer uses.
  10024. if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
  10025. if (N0.getNode()->use_size() > N1.getNode()->use_size())
  10026. std::swap(N0, N1);
  10027. }
  10028. // fold (fadd (fmul x, y), z) -> (fma x, y, z)
  10029. if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
  10030. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10031. N0.getOperand(0), N0.getOperand(1), N1, Flags);
  10032. }
  10033. // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
  10034. // Note: Commutes FADD operands.
  10035. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
  10036. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10037. N1.getOperand(0), N1.getOperand(1), N0, Flags);
  10038. }
  10039. // Look through FP_EXTEND nodes to do more combining.
  10040. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
  10041. if (N0.getOpcode() == ISD::FP_EXTEND) {
  10042. SDValue N00 = N0.getOperand(0);
  10043. if (isContractableFMUL(N00) &&
  10044. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
  10045. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10046. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10047. N00.getOperand(0)),
  10048. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10049. N00.getOperand(1)), N1, Flags);
  10050. }
  10051. }
  10052. // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
  10053. // Note: Commutes FADD operands.
  10054. if (N1.getOpcode() == ISD::FP_EXTEND) {
  10055. SDValue N10 = N1.getOperand(0);
  10056. if (isContractableFMUL(N10) &&
  10057. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
  10058. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10059. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10060. N10.getOperand(0)),
  10061. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10062. N10.getOperand(1)), N0, Flags);
  10063. }
  10064. }
  10065. // More folding opportunities when target permits.
  10066. if (Aggressive) {
  10067. // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
  10068. if (CanFuse &&
  10069. N0.getOpcode() == PreferredFusedOpcode &&
  10070. N0.getOperand(2).getOpcode() == ISD::FMUL &&
  10071. N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
  10072. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10073. N0.getOperand(0), N0.getOperand(1),
  10074. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10075. N0.getOperand(2).getOperand(0),
  10076. N0.getOperand(2).getOperand(1),
  10077. N1, Flags), Flags);
  10078. }
  10079. // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
  10080. if (CanFuse &&
  10081. N1->getOpcode() == PreferredFusedOpcode &&
  10082. N1.getOperand(2).getOpcode() == ISD::FMUL &&
  10083. N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
  10084. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10085. N1.getOperand(0), N1.getOperand(1),
  10086. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10087. N1.getOperand(2).getOperand(0),
  10088. N1.getOperand(2).getOperand(1),
  10089. N0, Flags), Flags);
  10090. }
  10091. // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
  10092. // -> (fma x, y, (fma (fpext u), (fpext v), z))
  10093. auto FoldFAddFMAFPExtFMul = [&] (
  10094. SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
  10095. SDNodeFlags Flags) {
  10096. return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
  10097. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10098. DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
  10099. DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
  10100. Z, Flags), Flags);
  10101. };
  10102. if (N0.getOpcode() == PreferredFusedOpcode) {
  10103. SDValue N02 = N0.getOperand(2);
  10104. if (N02.getOpcode() == ISD::FP_EXTEND) {
  10105. SDValue N020 = N02.getOperand(0);
  10106. if (isContractableFMUL(N020) &&
  10107. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
  10108. return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
  10109. N020.getOperand(0), N020.getOperand(1),
  10110. N1, Flags);
  10111. }
  10112. }
  10113. }
  10114. // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
  10115. // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
  10116. // FIXME: This turns two single-precision and one double-precision
  10117. // operation into two double-precision operations, which might not be
  10118. // interesting for all targets, especially GPUs.
  10119. auto FoldFAddFPExtFMAFMul = [&] (
  10120. SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
  10121. SDNodeFlags Flags) {
  10122. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10123. DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
  10124. DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
  10125. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10126. DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
  10127. DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
  10128. Z, Flags), Flags);
  10129. };
  10130. if (N0.getOpcode() == ISD::FP_EXTEND) {
  10131. SDValue N00 = N0.getOperand(0);
  10132. if (N00.getOpcode() == PreferredFusedOpcode) {
  10133. SDValue N002 = N00.getOperand(2);
  10134. if (isContractableFMUL(N002) &&
  10135. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
  10136. return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
  10137. N002.getOperand(0), N002.getOperand(1),
  10138. N1, Flags);
  10139. }
  10140. }
  10141. }
  10142. // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
  10143. // -> (fma y, z, (fma (fpext u), (fpext v), x))
  10144. if (N1.getOpcode() == PreferredFusedOpcode) {
  10145. SDValue N12 = N1.getOperand(2);
  10146. if (N12.getOpcode() == ISD::FP_EXTEND) {
  10147. SDValue N120 = N12.getOperand(0);
  10148. if (isContractableFMUL(N120) &&
  10149. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
  10150. return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
  10151. N120.getOperand(0), N120.getOperand(1),
  10152. N0, Flags);
  10153. }
  10154. }
  10155. }
  10156. // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
  10157. // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
  10158. // FIXME: This turns two single-precision and one double-precision
  10159. // operation into two double-precision operations, which might not be
  10160. // interesting for all targets, especially GPUs.
  10161. if (N1.getOpcode() == ISD::FP_EXTEND) {
  10162. SDValue N10 = N1.getOperand(0);
  10163. if (N10.getOpcode() == PreferredFusedOpcode) {
  10164. SDValue N102 = N10.getOperand(2);
  10165. if (isContractableFMUL(N102) &&
  10166. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
  10167. return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
  10168. N102.getOperand(0), N102.getOperand(1),
  10169. N0, Flags);
  10170. }
  10171. }
  10172. }
  10173. }
  10174. return SDValue();
  10175. }
  10176. /// Try to perform FMA combining on a given FSUB node.
  10177. SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
  10178. SDValue N0 = N->getOperand(0);
  10179. SDValue N1 = N->getOperand(1);
  10180. EVT VT = N->getValueType(0);
  10181. SDLoc SL(N);
  10182. const TargetOptions &Options = DAG.getTarget().Options;
  10183. // Floating-point multiply-add with intermediate rounding.
  10184. bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
  10185. // Floating-point multiply-add without intermediate rounding.
  10186. bool HasFMA =
  10187. TLI.isFMAFasterThanFMulAndFAdd(VT) &&
  10188. (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
  10189. // No valid opcode, do not combine.
  10190. if (!HasFMAD && !HasFMA)
  10191. return SDValue();
  10192. const SDNodeFlags Flags = N->getFlags();
  10193. bool CanFuse = Options.UnsafeFPMath || isContractable(N);
  10194. bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
  10195. CanFuse || HasFMAD);
  10196. // If the subtraction is not contractable, do not combine.
  10197. if (!AllowFusionGlobally && !isContractable(N))
  10198. return SDValue();
  10199. const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
  10200. if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
  10201. return SDValue();
  10202. // Always prefer FMAD to FMA for precision.
  10203. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
  10204. bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
  10205. // Is the node an FMUL and contractable either due to global flags or
  10206. // SDNodeFlags.
  10207. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
  10208. if (N.getOpcode() != ISD::FMUL)
  10209. return false;
  10210. return AllowFusionGlobally || isContractable(N.getNode());
  10211. };
  10212. // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
  10213. if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
  10214. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10215. N0.getOperand(0), N0.getOperand(1),
  10216. DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
  10217. }
  10218. // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
  10219. // Note: Commutes FSUB operands.
  10220. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
  10221. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10222. DAG.getNode(ISD::FNEG, SL, VT,
  10223. N1.getOperand(0)),
  10224. N1.getOperand(1), N0, Flags);
  10225. }
  10226. // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
  10227. if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
  10228. (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
  10229. SDValue N00 = N0.getOperand(0).getOperand(0);
  10230. SDValue N01 = N0.getOperand(0).getOperand(1);
  10231. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10232. DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
  10233. DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
  10234. }
  10235. // Look through FP_EXTEND nodes to do more combining.
  10236. // fold (fsub (fpext (fmul x, y)), z)
  10237. // -> (fma (fpext x), (fpext y), (fneg z))
  10238. if (N0.getOpcode() == ISD::FP_EXTEND) {
  10239. SDValue N00 = N0.getOperand(0);
  10240. if (isContractableFMUL(N00) &&
  10241. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
  10242. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10243. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10244. N00.getOperand(0)),
  10245. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10246. N00.getOperand(1)),
  10247. DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
  10248. }
  10249. }
  10250. // fold (fsub x, (fpext (fmul y, z)))
  10251. // -> (fma (fneg (fpext y)), (fpext z), x)
  10252. // Note: Commutes FSUB operands.
  10253. if (N1.getOpcode() == ISD::FP_EXTEND) {
  10254. SDValue N10 = N1.getOperand(0);
  10255. if (isContractableFMUL(N10) &&
  10256. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
  10257. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10258. DAG.getNode(ISD::FNEG, SL, VT,
  10259. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10260. N10.getOperand(0))),
  10261. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10262. N10.getOperand(1)),
  10263. N0, Flags);
  10264. }
  10265. }
  10266. // fold (fsub (fpext (fneg (fmul, x, y))), z)
  10267. // -> (fneg (fma (fpext x), (fpext y), z))
  10268. // Note: This could be removed with appropriate canonicalization of the
  10269. // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
  10270. // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
  10271. // from implementing the canonicalization in visitFSUB.
  10272. if (N0.getOpcode() == ISD::FP_EXTEND) {
  10273. SDValue N00 = N0.getOperand(0);
  10274. if (N00.getOpcode() == ISD::FNEG) {
  10275. SDValue N000 = N00.getOperand(0);
  10276. if (isContractableFMUL(N000) &&
  10277. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
  10278. return DAG.getNode(ISD::FNEG, SL, VT,
  10279. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10280. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10281. N000.getOperand(0)),
  10282. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10283. N000.getOperand(1)),
  10284. N1, Flags));
  10285. }
  10286. }
  10287. }
  10288. // fold (fsub (fneg (fpext (fmul, x, y))), z)
  10289. // -> (fneg (fma (fpext x)), (fpext y), z)
  10290. // Note: This could be removed with appropriate canonicalization of the
  10291. // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
  10292. // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
  10293. // from implementing the canonicalization in visitFSUB.
  10294. if (N0.getOpcode() == ISD::FNEG) {
  10295. SDValue N00 = N0.getOperand(0);
  10296. if (N00.getOpcode() == ISD::FP_EXTEND) {
  10297. SDValue N000 = N00.getOperand(0);
  10298. if (isContractableFMUL(N000) &&
  10299. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
  10300. return DAG.getNode(ISD::FNEG, SL, VT,
  10301. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10302. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10303. N000.getOperand(0)),
  10304. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10305. N000.getOperand(1)),
  10306. N1, Flags));
  10307. }
  10308. }
  10309. }
  10310. // More folding opportunities when target permits.
  10311. if (Aggressive) {
  10312. // fold (fsub (fma x, y, (fmul u, v)), z)
  10313. // -> (fma x, y (fma u, v, (fneg z)))
  10314. if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
  10315. isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
  10316. N0.getOperand(2)->hasOneUse()) {
  10317. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10318. N0.getOperand(0), N0.getOperand(1),
  10319. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10320. N0.getOperand(2).getOperand(0),
  10321. N0.getOperand(2).getOperand(1),
  10322. DAG.getNode(ISD::FNEG, SL, VT,
  10323. N1), Flags), Flags);
  10324. }
  10325. // fold (fsub x, (fma y, z, (fmul u, v)))
  10326. // -> (fma (fneg y), z, (fma (fneg u), v, x))
  10327. if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
  10328. isContractableFMUL(N1.getOperand(2))) {
  10329. SDValue N20 = N1.getOperand(2).getOperand(0);
  10330. SDValue N21 = N1.getOperand(2).getOperand(1);
  10331. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10332. DAG.getNode(ISD::FNEG, SL, VT,
  10333. N1.getOperand(0)),
  10334. N1.getOperand(1),
  10335. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10336. DAG.getNode(ISD::FNEG, SL, VT, N20),
  10337. N21, N0, Flags), Flags);
  10338. }
  10339. // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
  10340. // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
  10341. if (N0.getOpcode() == PreferredFusedOpcode) {
  10342. SDValue N02 = N0.getOperand(2);
  10343. if (N02.getOpcode() == ISD::FP_EXTEND) {
  10344. SDValue N020 = N02.getOperand(0);
  10345. if (isContractableFMUL(N020) &&
  10346. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
  10347. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10348. N0.getOperand(0), N0.getOperand(1),
  10349. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10350. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10351. N020.getOperand(0)),
  10352. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10353. N020.getOperand(1)),
  10354. DAG.getNode(ISD::FNEG, SL, VT,
  10355. N1), Flags), Flags);
  10356. }
  10357. }
  10358. }
  10359. // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
  10360. // -> (fma (fpext x), (fpext y),
  10361. // (fma (fpext u), (fpext v), (fneg z)))
  10362. // FIXME: This turns two single-precision and one double-precision
  10363. // operation into two double-precision operations, which might not be
  10364. // interesting for all targets, especially GPUs.
  10365. if (N0.getOpcode() == ISD::FP_EXTEND) {
  10366. SDValue N00 = N0.getOperand(0);
  10367. if (N00.getOpcode() == PreferredFusedOpcode) {
  10368. SDValue N002 = N00.getOperand(2);
  10369. if (isContractableFMUL(N002) &&
  10370. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
  10371. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10372. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10373. N00.getOperand(0)),
  10374. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10375. N00.getOperand(1)),
  10376. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10377. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10378. N002.getOperand(0)),
  10379. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10380. N002.getOperand(1)),
  10381. DAG.getNode(ISD::FNEG, SL, VT,
  10382. N1), Flags), Flags);
  10383. }
  10384. }
  10385. }
  10386. // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
  10387. // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
  10388. if (N1.getOpcode() == PreferredFusedOpcode &&
  10389. N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
  10390. SDValue N120 = N1.getOperand(2).getOperand(0);
  10391. if (isContractableFMUL(N120) &&
  10392. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
  10393. SDValue N1200 = N120.getOperand(0);
  10394. SDValue N1201 = N120.getOperand(1);
  10395. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10396. DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
  10397. N1.getOperand(1),
  10398. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10399. DAG.getNode(ISD::FNEG, SL, VT,
  10400. DAG.getNode(ISD::FP_EXTEND, SL,
  10401. VT, N1200)),
  10402. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10403. N1201),
  10404. N0, Flags), Flags);
  10405. }
  10406. }
  10407. // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
  10408. // -> (fma (fneg (fpext y)), (fpext z),
  10409. // (fma (fneg (fpext u)), (fpext v), x))
  10410. // FIXME: This turns two single-precision and one double-precision
  10411. // operation into two double-precision operations, which might not be
  10412. // interesting for all targets, especially GPUs.
  10413. if (N1.getOpcode() == ISD::FP_EXTEND &&
  10414. N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
  10415. SDValue CvtSrc = N1.getOperand(0);
  10416. SDValue N100 = CvtSrc.getOperand(0);
  10417. SDValue N101 = CvtSrc.getOperand(1);
  10418. SDValue N102 = CvtSrc.getOperand(2);
  10419. if (isContractableFMUL(N102) &&
  10420. TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
  10421. SDValue N1020 = N102.getOperand(0);
  10422. SDValue N1021 = N102.getOperand(1);
  10423. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10424. DAG.getNode(ISD::FNEG, SL, VT,
  10425. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10426. N100)),
  10427. DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
  10428. DAG.getNode(PreferredFusedOpcode, SL, VT,
  10429. DAG.getNode(ISD::FNEG, SL, VT,
  10430. DAG.getNode(ISD::FP_EXTEND, SL,
  10431. VT, N1020)),
  10432. DAG.getNode(ISD::FP_EXTEND, SL, VT,
  10433. N1021),
  10434. N0, Flags), Flags);
  10435. }
  10436. }
  10437. }
  10438. return SDValue();
  10439. }
  10440. /// Try to perform FMA combining on a given FMUL node based on the distributive
  10441. /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
  10442. /// subtraction instead of addition).
  10443. SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
  10444. SDValue N0 = N->getOperand(0);
  10445. SDValue N1 = N->getOperand(1);
  10446. EVT VT = N->getValueType(0);
  10447. SDLoc SL(N);
  10448. const SDNodeFlags Flags = N->getFlags();
  10449. assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
  10450. const TargetOptions &Options = DAG.getTarget().Options;
  10451. // The transforms below are incorrect when x == 0 and y == inf, because the
  10452. // intermediate multiplication produces a nan.
  10453. if (!Options.NoInfsFPMath)
  10454. return SDValue();
  10455. // Floating-point multiply-add without intermediate rounding.
  10456. bool HasFMA =
  10457. (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
  10458. TLI.isFMAFasterThanFMulAndFAdd(VT) &&
  10459. (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
  10460. // Floating-point multiply-add with intermediate rounding. This can result
  10461. // in a less precise result due to the changed rounding order.
  10462. bool HasFMAD = Options.UnsafeFPMath &&
  10463. (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
  10464. // No valid opcode, do not combine.
  10465. if (!HasFMAD && !HasFMA)
  10466. return SDValue();
  10467. // Always prefer FMAD to FMA for precision.
  10468. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
  10469. bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
  10470. // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
  10471. // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
  10472. auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
  10473. if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
  10474. if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
  10475. if (C->isExactlyValue(+1.0))
  10476. return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
  10477. Y, Flags);
  10478. if (C->isExactlyValue(-1.0))
  10479. return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
  10480. DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
  10481. }
  10482. }
  10483. return SDValue();
  10484. };
  10485. if (SDValue FMA = FuseFADD(N0, N1, Flags))
  10486. return FMA;
  10487. if (SDValue FMA = FuseFADD(N1, N0, Flags))
  10488. return FMA;
  10489. // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
  10490. // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
  10491. // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
  10492. // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
  10493. auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
  10494. if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
  10495. if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
  10496. if (C0->isExactlyValue(+1.0))
  10497. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10498. DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
  10499. Y, Flags);
  10500. if (C0->isExactlyValue(-1.0))
  10501. return DAG.getNode(PreferredFusedOpcode, SL, VT,
  10502. DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
  10503. DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
  10504. }
  10505. if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
  10506. if (C1->isExactlyValue(+1.0))
  10507. return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
  10508. DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
  10509. if (C1->isExactlyValue(-1.0))
  10510. return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
  10511. Y, Flags);
  10512. }
  10513. }
  10514. return SDValue();
  10515. };
  10516. if (SDValue FMA = FuseFSUB(N0, N1, Flags))
  10517. return FMA;
  10518. if (SDValue FMA = FuseFSUB(N1, N0, Flags))
  10519. return FMA;
  10520. return SDValue();
  10521. }
  10522. SDValue DAGCombiner::visitFADD(SDNode *N) {
  10523. SDValue N0 = N->getOperand(0);
  10524. SDValue N1 = N->getOperand(1);
  10525. bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
  10526. bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
  10527. EVT VT = N->getValueType(0);
  10528. SDLoc DL(N);
  10529. const TargetOptions &Options = DAG.getTarget().Options;
  10530. const SDNodeFlags Flags = N->getFlags();
  10531. // fold vector ops
  10532. if (VT.isVector())
  10533. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  10534. return FoldedVOp;
  10535. // fold (fadd c1, c2) -> c1 + c2
  10536. if (N0CFP && N1CFP)
  10537. return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
  10538. // canonicalize constant to RHS
  10539. if (N0CFP && !N1CFP)
  10540. return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
  10541. // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
  10542. ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
  10543. if (N1C && N1C->isZero())
  10544. if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
  10545. return N0;
  10546. if (SDValue NewSel = foldBinOpIntoSelect(N))
  10547. return NewSel;
  10548. // fold (fadd A, (fneg B)) -> (fsub A, B)
  10549. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  10550. isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
  10551. return DAG.getNode(ISD::FSUB, DL, VT, N0,
  10552. GetNegatedExpression(N1, DAG, LegalOperations,
  10553. ForCodeSize), Flags);
  10554. // fold (fadd (fneg A), B) -> (fsub B, A)
  10555. if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
  10556. isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
  10557. return DAG.getNode(ISD::FSUB, DL, VT, N1,
  10558. GetNegatedExpression(N0, DAG, LegalOperations,
  10559. ForCodeSize), Flags);
  10560. auto isFMulNegTwo = [](SDValue FMul) {
  10561. if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
  10562. return false;
  10563. auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
  10564. return C && C->isExactlyValue(-2.0);
  10565. };
  10566. // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
  10567. if (isFMulNegTwo(N0)) {
  10568. SDValue B = N0.getOperand(0);
  10569. SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
  10570. return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
  10571. }
  10572. // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
  10573. if (isFMulNegTwo(N1)) {
  10574. SDValue B = N1.getOperand(0);
  10575. SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
  10576. return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
  10577. }
  10578. // No FP constant should be created after legalization as Instruction
  10579. // Selection pass has a hard time dealing with FP constants.
  10580. bool AllowNewConst = (Level < AfterLegalizeDAG);
  10581. // If nnan is enabled, fold lots of things.
  10582. if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
  10583. // If allowed, fold (fadd (fneg x), x) -> 0.0
  10584. if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
  10585. return DAG.getConstantFP(0.0, DL, VT);
  10586. // If allowed, fold (fadd x, (fneg x)) -> 0.0
  10587. if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
  10588. return DAG.getConstantFP(0.0, DL, VT);
  10589. }
  10590. // If 'unsafe math' or reassoc and nsz, fold lots of things.
  10591. // TODO: break out portions of the transformations below for which Unsafe is
  10592. // considered and which do not require both nsz and reassoc
  10593. if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
  10594. (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
  10595. AllowNewConst) {
  10596. // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
  10597. if (N1CFP && N0.getOpcode() == ISD::FADD &&
  10598. isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
  10599. SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
  10600. return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
  10601. }
  10602. // We can fold chains of FADD's of the same value into multiplications.
  10603. // This transform is not safe in general because we are reducing the number
  10604. // of rounding steps.
  10605. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
  10606. if (N0.getOpcode() == ISD::FMUL) {
  10607. bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
  10608. bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
  10609. // (fadd (fmul x, c), x) -> (fmul x, c+1)
  10610. if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
  10611. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
  10612. DAG.getConstantFP(1.0, DL, VT), Flags);
  10613. return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
  10614. }
  10615. // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
  10616. if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
  10617. N1.getOperand(0) == N1.getOperand(1) &&
  10618. N0.getOperand(0) == N1.getOperand(0)) {
  10619. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
  10620. DAG.getConstantFP(2.0, DL, VT), Flags);
  10621. return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
  10622. }
  10623. }
  10624. if (N1.getOpcode() == ISD::FMUL) {
  10625. bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
  10626. bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
  10627. // (fadd x, (fmul x, c)) -> (fmul x, c+1)
  10628. if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
  10629. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
  10630. DAG.getConstantFP(1.0, DL, VT), Flags);
  10631. return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
  10632. }
  10633. // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
  10634. if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
  10635. N0.getOperand(0) == N0.getOperand(1) &&
  10636. N1.getOperand(0) == N0.getOperand(0)) {
  10637. SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
  10638. DAG.getConstantFP(2.0, DL, VT), Flags);
  10639. return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
  10640. }
  10641. }
  10642. if (N0.getOpcode() == ISD::FADD) {
  10643. bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
  10644. // (fadd (fadd x, x), x) -> (fmul x, 3.0)
  10645. if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
  10646. (N0.getOperand(0) == N1)) {
  10647. return DAG.getNode(ISD::FMUL, DL, VT,
  10648. N1, DAG.getConstantFP(3.0, DL, VT), Flags);
  10649. }
  10650. }
  10651. if (N1.getOpcode() == ISD::FADD) {
  10652. bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
  10653. // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
  10654. if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
  10655. N1.getOperand(0) == N0) {
  10656. return DAG.getNode(ISD::FMUL, DL, VT,
  10657. N0, DAG.getConstantFP(3.0, DL, VT), Flags);
  10658. }
  10659. }
  10660. // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
  10661. if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
  10662. N0.getOperand(0) == N0.getOperand(1) &&
  10663. N1.getOperand(0) == N1.getOperand(1) &&
  10664. N0.getOperand(0) == N1.getOperand(0)) {
  10665. return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
  10666. DAG.getConstantFP(4.0, DL, VT), Flags);
  10667. }
  10668. }
  10669. } // enable-unsafe-fp-math
  10670. // FADD -> FMA combines:
  10671. if (SDValue Fused = visitFADDForFMACombine(N)) {
  10672. AddToWorklist(Fused.getNode());
  10673. return Fused;
  10674. }
  10675. return SDValue();
  10676. }
  10677. SDValue DAGCombiner::visitFSUB(SDNode *N) {
  10678. SDValue N0 = N->getOperand(0);
  10679. SDValue N1 = N->getOperand(1);
  10680. ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
  10681. ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
  10682. EVT VT = N->getValueType(0);
  10683. SDLoc DL(N);
  10684. const TargetOptions &Options = DAG.getTarget().Options;
  10685. const SDNodeFlags Flags = N->getFlags();
  10686. // fold vector ops
  10687. if (VT.isVector())
  10688. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  10689. return FoldedVOp;
  10690. // fold (fsub c1, c2) -> c1-c2
  10691. if (N0CFP && N1CFP)
  10692. return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
  10693. if (SDValue NewSel = foldBinOpIntoSelect(N))
  10694. return NewSel;
  10695. // (fsub A, 0) -> A
  10696. if (N1CFP && N1CFP->isZero()) {
  10697. if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
  10698. Flags.hasNoSignedZeros()) {
  10699. return N0;
  10700. }
  10701. }
  10702. if (N0 == N1) {
  10703. // (fsub x, x) -> 0.0
  10704. if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
  10705. return DAG.getConstantFP(0.0f, DL, VT);
  10706. }
  10707. // (fsub -0.0, N1) -> -N1
  10708. // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
  10709. // FSUB does not specify the sign bit of a NaN. Also note that for
  10710. // the same reason, the inverse transform is not safe, unless fast math
  10711. // flags are in play.
  10712. if (N0CFP && N0CFP->isZero()) {
  10713. if (N0CFP->isNegative() ||
  10714. (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
  10715. if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
  10716. return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
  10717. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  10718. return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
  10719. }
  10720. }
  10721. if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
  10722. (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
  10723. && N1.getOpcode() == ISD::FADD) {
  10724. // X - (X + Y) -> -Y
  10725. if (N0 == N1->getOperand(0))
  10726. return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
  10727. // X - (Y + X) -> -Y
  10728. if (N0 == N1->getOperand(1))
  10729. return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
  10730. }
  10731. // fold (fsub A, (fneg B)) -> (fadd A, B)
  10732. if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
  10733. return DAG.getNode(ISD::FADD, DL, VT, N0,
  10734. GetNegatedExpression(N1, DAG, LegalOperations,
  10735. ForCodeSize), Flags);
  10736. // FSUB -> FMA combines:
  10737. if (SDValue Fused = visitFSUBForFMACombine(N)) {
  10738. AddToWorklist(Fused.getNode());
  10739. return Fused;
  10740. }
  10741. return SDValue();
  10742. }
  10743. /// Return true if both inputs are at least as cheap in negated form and at
  10744. /// least one input is strictly cheaper in negated form.
  10745. bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
  10746. const TargetOptions &Options = DAG.getTarget().Options;
  10747. if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
  10748. ForCodeSize))
  10749. if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
  10750. ForCodeSize))
  10751. // Both negated operands are at least as cheap as their counterparts.
  10752. // Check to see if at least one is cheaper negated.
  10753. if (LHSNeg == 2 || RHSNeg == 2)
  10754. return true;
  10755. return false;
  10756. }
  10757. SDValue DAGCombiner::visitFMUL(SDNode *N) {
  10758. SDValue N0 = N->getOperand(0);
  10759. SDValue N1 = N->getOperand(1);
  10760. ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
  10761. ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
  10762. EVT VT = N->getValueType(0);
  10763. SDLoc DL(N);
  10764. const TargetOptions &Options = DAG.getTarget().Options;
  10765. const SDNodeFlags Flags = N->getFlags();
  10766. // fold vector ops
  10767. if (VT.isVector()) {
  10768. // This just handles C1 * C2 for vectors. Other vector folds are below.
  10769. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  10770. return FoldedVOp;
  10771. }
  10772. // fold (fmul c1, c2) -> c1*c2
  10773. if (N0CFP && N1CFP)
  10774. return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
  10775. // canonicalize constant to RHS
  10776. if (isConstantFPBuildVectorOrConstantFP(N0) &&
  10777. !isConstantFPBuildVectorOrConstantFP(N1))
  10778. return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
  10779. if (SDValue NewSel = foldBinOpIntoSelect(N))
  10780. return NewSel;
  10781. if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
  10782. (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
  10783. // fold (fmul A, 0) -> 0
  10784. if (N1CFP && N1CFP->isZero())
  10785. return N1;
  10786. }
  10787. if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
  10788. // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
  10789. if (isConstantFPBuildVectorOrConstantFP(N1) &&
  10790. N0.getOpcode() == ISD::FMUL) {
  10791. SDValue N00 = N0.getOperand(0);
  10792. SDValue N01 = N0.getOperand(1);
  10793. // Avoid an infinite loop by making sure that N00 is not a constant
  10794. // (the inner multiply has not been constant folded yet).
  10795. if (isConstantFPBuildVectorOrConstantFP(N01) &&
  10796. !isConstantFPBuildVectorOrConstantFP(N00)) {
  10797. SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
  10798. return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
  10799. }
  10800. }
  10801. // Match a special-case: we convert X * 2.0 into fadd.
  10802. // fmul (fadd X, X), C -> fmul X, 2.0 * C
  10803. if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
  10804. N0.getOperand(0) == N0.getOperand(1)) {
  10805. const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
  10806. SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
  10807. return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
  10808. }
  10809. }
  10810. // fold (fmul X, 2.0) -> (fadd X, X)
  10811. if (N1CFP && N1CFP->isExactlyValue(+2.0))
  10812. return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
  10813. // fold (fmul X, -1.0) -> (fneg X)
  10814. if (N1CFP && N1CFP->isExactlyValue(-1.0))
  10815. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  10816. return DAG.getNode(ISD::FNEG, DL, VT, N0);
  10817. // -N0 * -N1 --> N0 * N1
  10818. if (isCheaperToUseNegatedFPOps(N0, N1)) {
  10819. SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
  10820. SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
  10821. return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
  10822. }
  10823. // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
  10824. // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
  10825. if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
  10826. (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
  10827. TLI.isOperationLegal(ISD::FABS, VT)) {
  10828. SDValue Select = N0, X = N1;
  10829. if (Select.getOpcode() != ISD::SELECT)
  10830. std::swap(Select, X);
  10831. SDValue Cond = Select.getOperand(0);
  10832. auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
  10833. auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
  10834. if (TrueOpnd && FalseOpnd &&
  10835. Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
  10836. isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
  10837. cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
  10838. ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
  10839. switch (CC) {
  10840. default: break;
  10841. case ISD::SETOLT:
  10842. case ISD::SETULT:
  10843. case ISD::SETOLE:
  10844. case ISD::SETULE:
  10845. case ISD::SETLT:
  10846. case ISD::SETLE:
  10847. std::swap(TrueOpnd, FalseOpnd);
  10848. LLVM_FALLTHROUGH;
  10849. case ISD::SETOGT:
  10850. case ISD::SETUGT:
  10851. case ISD::SETOGE:
  10852. case ISD::SETUGE:
  10853. case ISD::SETGT:
  10854. case ISD::SETGE:
  10855. if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
  10856. TLI.isOperationLegal(ISD::FNEG, VT))
  10857. return DAG.getNode(ISD::FNEG, DL, VT,
  10858. DAG.getNode(ISD::FABS, DL, VT, X));
  10859. if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
  10860. return DAG.getNode(ISD::FABS, DL, VT, X);
  10861. break;
  10862. }
  10863. }
  10864. }
  10865. // FMUL -> FMA combines:
  10866. if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
  10867. AddToWorklist(Fused.getNode());
  10868. return Fused;
  10869. }
  10870. return SDValue();
  10871. }
  10872. SDValue DAGCombiner::visitFMA(SDNode *N) {
  10873. SDValue N0 = N->getOperand(0);
  10874. SDValue N1 = N->getOperand(1);
  10875. SDValue N2 = N->getOperand(2);
  10876. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  10877. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  10878. EVT VT = N->getValueType(0);
  10879. SDLoc DL(N);
  10880. const TargetOptions &Options = DAG.getTarget().Options;
  10881. // FMA nodes have flags that propagate to the created nodes.
  10882. const SDNodeFlags Flags = N->getFlags();
  10883. bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
  10884. // Constant fold FMA.
  10885. if (isa<ConstantFPSDNode>(N0) &&
  10886. isa<ConstantFPSDNode>(N1) &&
  10887. isa<ConstantFPSDNode>(N2)) {
  10888. return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
  10889. }
  10890. // (-N0 * -N1) + N2 --> (N0 * N1) + N2
  10891. if (isCheaperToUseNegatedFPOps(N0, N1)) {
  10892. SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
  10893. SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
  10894. return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
  10895. }
  10896. if (UnsafeFPMath) {
  10897. if (N0CFP && N0CFP->isZero())
  10898. return N2;
  10899. if (N1CFP && N1CFP->isZero())
  10900. return N2;
  10901. }
  10902. // TODO: The FMA node should have flags that propagate to these nodes.
  10903. if (N0CFP && N0CFP->isExactlyValue(1.0))
  10904. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
  10905. if (N1CFP && N1CFP->isExactlyValue(1.0))
  10906. return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
  10907. // Canonicalize (fma c, x, y) -> (fma x, c, y)
  10908. if (isConstantFPBuildVectorOrConstantFP(N0) &&
  10909. !isConstantFPBuildVectorOrConstantFP(N1))
  10910. return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
  10911. if (UnsafeFPMath) {
  10912. // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
  10913. if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
  10914. isConstantFPBuildVectorOrConstantFP(N1) &&
  10915. isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
  10916. return DAG.getNode(ISD::FMUL, DL, VT, N0,
  10917. DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
  10918. Flags), Flags);
  10919. }
  10920. // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
  10921. if (N0.getOpcode() == ISD::FMUL &&
  10922. isConstantFPBuildVectorOrConstantFP(N1) &&
  10923. isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
  10924. return DAG.getNode(ISD::FMA, DL, VT,
  10925. N0.getOperand(0),
  10926. DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
  10927. Flags),
  10928. N2);
  10929. }
  10930. }
  10931. // (fma x, 1, y) -> (fadd x, y)
  10932. // (fma x, -1, y) -> (fadd (fneg x), y)
  10933. if (N1CFP) {
  10934. if (N1CFP->isExactlyValue(1.0))
  10935. // TODO: The FMA node should have flags that propagate to this node.
  10936. return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
  10937. if (N1CFP->isExactlyValue(-1.0) &&
  10938. (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
  10939. SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
  10940. AddToWorklist(RHSNeg.getNode());
  10941. // TODO: The FMA node should have flags that propagate to this node.
  10942. return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
  10943. }
  10944. // fma (fneg x), K, y -> fma x -K, y
  10945. if (N0.getOpcode() == ISD::FNEG &&
  10946. (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
  10947. (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
  10948. ForCodeSize)))) {
  10949. return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
  10950. DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
  10951. }
  10952. }
  10953. if (UnsafeFPMath) {
  10954. // (fma x, c, x) -> (fmul x, (c+1))
  10955. if (N1CFP && N0 == N2) {
  10956. return DAG.getNode(ISD::FMUL, DL, VT, N0,
  10957. DAG.getNode(ISD::FADD, DL, VT, N1,
  10958. DAG.getConstantFP(1.0, DL, VT), Flags),
  10959. Flags);
  10960. }
  10961. // (fma x, c, (fneg x)) -> (fmul x, (c-1))
  10962. if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
  10963. return DAG.getNode(ISD::FMUL, DL, VT, N0,
  10964. DAG.getNode(ISD::FADD, DL, VT, N1,
  10965. DAG.getConstantFP(-1.0, DL, VT), Flags),
  10966. Flags);
  10967. }
  10968. }
  10969. return SDValue();
  10970. }
  10971. // Combine multiple FDIVs with the same divisor into multiple FMULs by the
  10972. // reciprocal.
  10973. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
  10974. // Notice that this is not always beneficial. One reason is different targets
  10975. // may have different costs for FDIV and FMUL, so sometimes the cost of two
  10976. // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
  10977. // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
  10978. SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
  10979. // TODO: Limit this transform based on optsize/minsize - it always creates at
  10980. // least 1 extra instruction. But the perf win may be substantial enough
  10981. // that only minsize should restrict this.
  10982. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
  10983. const SDNodeFlags Flags = N->getFlags();
  10984. if (!UnsafeMath && !Flags.hasAllowReciprocal())
  10985. return SDValue();
  10986. // Skip if current node is a reciprocal/fneg-reciprocal.
  10987. SDValue N0 = N->getOperand(0);
  10988. ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
  10989. if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
  10990. return SDValue();
  10991. // Exit early if the target does not want this transform or if there can't
  10992. // possibly be enough uses of the divisor to make the transform worthwhile.
  10993. SDValue N1 = N->getOperand(1);
  10994. unsigned MinUses = TLI.combineRepeatedFPDivisors();
  10995. // For splat vectors, scale the number of uses by the splat factor. If we can
  10996. // convert the division into a scalar op, that will likely be much faster.
  10997. unsigned NumElts = 1;
  10998. EVT VT = N->getValueType(0);
  10999. if (VT.isVector() && DAG.isSplatValue(N1))
  11000. NumElts = VT.getVectorNumElements();
  11001. if (!MinUses || (N1->use_size() * NumElts) < MinUses)
  11002. return SDValue();
  11003. // Find all FDIV users of the same divisor.
  11004. // Use a set because duplicates may be present in the user list.
  11005. SetVector<SDNode *> Users;
  11006. for (auto *U : N1->uses()) {
  11007. if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
  11008. // This division is eligible for optimization only if global unsafe math
  11009. // is enabled or if this division allows reciprocal formation.
  11010. if (UnsafeMath || U->getFlags().hasAllowReciprocal())
  11011. Users.insert(U);
  11012. }
  11013. }
  11014. // Now that we have the actual number of divisor uses, make sure it meets
  11015. // the minimum threshold specified by the target.
  11016. if ((Users.size() * NumElts) < MinUses)
  11017. return SDValue();
  11018. SDLoc DL(N);
  11019. SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
  11020. SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
  11021. // Dividend / Divisor -> Dividend * Reciprocal
  11022. for (auto *U : Users) {
  11023. SDValue Dividend = U->getOperand(0);
  11024. if (Dividend != FPOne) {
  11025. SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
  11026. Reciprocal, Flags);
  11027. CombineTo(U, NewNode);
  11028. } else if (U != Reciprocal.getNode()) {
  11029. // In the absence of fast-math-flags, this user node is always the
  11030. // same node as Reciprocal, but with FMF they may be different nodes.
  11031. CombineTo(U, Reciprocal);
  11032. }
  11033. }
  11034. return SDValue(N, 0); // N was replaced.
  11035. }
  11036. SDValue DAGCombiner::visitFDIV(SDNode *N) {
  11037. SDValue N0 = N->getOperand(0);
  11038. SDValue N1 = N->getOperand(1);
  11039. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  11040. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  11041. EVT VT = N->getValueType(0);
  11042. SDLoc DL(N);
  11043. const TargetOptions &Options = DAG.getTarget().Options;
  11044. SDNodeFlags Flags = N->getFlags();
  11045. // fold vector ops
  11046. if (VT.isVector())
  11047. if (SDValue FoldedVOp = SimplifyVBinOp(N))
  11048. return FoldedVOp;
  11049. // fold (fdiv c1, c2) -> c1/c2
  11050. if (N0CFP && N1CFP)
  11051. return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
  11052. if (SDValue NewSel = foldBinOpIntoSelect(N))
  11053. return NewSel;
  11054. if (SDValue V = combineRepeatedFPDivisors(N))
  11055. return V;
  11056. if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
  11057. // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
  11058. if (N1CFP) {
  11059. // Compute the reciprocal 1.0 / c2.
  11060. const APFloat &N1APF = N1CFP->getValueAPF();
  11061. APFloat Recip(N1APF.getSemantics(), 1); // 1.0
  11062. APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
  11063. // Only do the transform if the reciprocal is a legal fp immediate that
  11064. // isn't too nasty (eg NaN, denormal, ...).
  11065. if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
  11066. (!LegalOperations ||
  11067. // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
  11068. // backend)... we should handle this gracefully after Legalize.
  11069. // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
  11070. TLI.isOperationLegal(ISD::ConstantFP, VT) ||
  11071. TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
  11072. return DAG.getNode(ISD::FMUL, DL, VT, N0,
  11073. DAG.getConstantFP(Recip, DL, VT), Flags);
  11074. }
  11075. // If this FDIV is part of a reciprocal square root, it may be folded
  11076. // into a target-specific square root estimate instruction.
  11077. if (N1.getOpcode() == ISD::FSQRT) {
  11078. if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
  11079. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
  11080. } else if (N1.getOpcode() == ISD::FP_EXTEND &&
  11081. N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  11082. if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
  11083. Flags)) {
  11084. RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
  11085. AddToWorklist(RV.getNode());
  11086. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
  11087. }
  11088. } else if (N1.getOpcode() == ISD::FP_ROUND &&
  11089. N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  11090. if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
  11091. Flags)) {
  11092. RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
  11093. AddToWorklist(RV.getNode());
  11094. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
  11095. }
  11096. } else if (N1.getOpcode() == ISD::FMUL) {
  11097. // Look through an FMUL. Even though this won't remove the FDIV directly,
  11098. // it's still worthwhile to get rid of the FSQRT if possible.
  11099. SDValue SqrtOp;
  11100. SDValue OtherOp;
  11101. if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
  11102. SqrtOp = N1.getOperand(0);
  11103. OtherOp = N1.getOperand(1);
  11104. } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
  11105. SqrtOp = N1.getOperand(1);
  11106. OtherOp = N1.getOperand(0);
  11107. }
  11108. if (SqrtOp.getNode()) {
  11109. // We found a FSQRT, so try to make this fold:
  11110. // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
  11111. if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
  11112. RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
  11113. AddToWorklist(RV.getNode());
  11114. return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
  11115. }
  11116. }
  11117. }
  11118. // Fold into a reciprocal estimate and multiply instead of a real divide.
  11119. if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
  11120. return RV;
  11121. }
  11122. // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
  11123. if (isCheaperToUseNegatedFPOps(N0, N1))
  11124. return DAG.getNode(
  11125. ISD::FDIV, SDLoc(N), VT,
  11126. GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
  11127. GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
  11128. return SDValue();
  11129. }
  11130. SDValue DAGCombiner::visitFREM(SDNode *N) {
  11131. SDValue N0 = N->getOperand(0);
  11132. SDValue N1 = N->getOperand(1);
  11133. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  11134. ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
  11135. EVT VT = N->getValueType(0);
  11136. // fold (frem c1, c2) -> fmod(c1,c2)
  11137. if (N0CFP && N1CFP)
  11138. return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
  11139. if (SDValue NewSel = foldBinOpIntoSelect(N))
  11140. return NewSel;
  11141. return SDValue();
  11142. }
  11143. SDValue DAGCombiner::visitFSQRT(SDNode *N) {
  11144. SDNodeFlags Flags = N->getFlags();
  11145. if (!DAG.getTarget().Options.UnsafeFPMath &&
  11146. !Flags.hasApproximateFuncs())
  11147. return SDValue();
  11148. SDValue N0 = N->getOperand(0);
  11149. if (TLI.isFsqrtCheap(N0, DAG))
  11150. return SDValue();
  11151. // FSQRT nodes have flags that propagate to the created nodes.
  11152. return buildSqrtEstimate(N0, Flags);
  11153. }
  11154. /// copysign(x, fp_extend(y)) -> copysign(x, y)
  11155. /// copysign(x, fp_round(y)) -> copysign(x, y)
  11156. static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
  11157. SDValue N1 = N->getOperand(1);
  11158. if ((N1.getOpcode() == ISD::FP_EXTEND ||
  11159. N1.getOpcode() == ISD::FP_ROUND)) {
  11160. // Do not optimize out type conversion of f128 type yet.
  11161. // For some targets like x86_64, configuration is changed to keep one f128
  11162. // value in one SSE register, but instruction selection cannot handle
  11163. // FCOPYSIGN on SSE registers yet.
  11164. EVT N1VT = N1->getValueType(0);
  11165. EVT N1Op0VT = N1->getOperand(0).getValueType();
  11166. return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
  11167. }
  11168. return false;
  11169. }
  11170. SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
  11171. SDValue N0 = N->getOperand(0);
  11172. SDValue N1 = N->getOperand(1);
  11173. bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
  11174. bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
  11175. EVT VT = N->getValueType(0);
  11176. if (N0CFP && N1CFP) // Constant fold
  11177. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
  11178. if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
  11179. const APFloat &V = N1C->getValueAPF();
  11180. // copysign(x, c1) -> fabs(x) iff ispos(c1)
  11181. // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
  11182. if (!V.isNegative()) {
  11183. if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
  11184. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  11185. } else {
  11186. if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
  11187. return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
  11188. DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
  11189. }
  11190. }
  11191. // copysign(fabs(x), y) -> copysign(x, y)
  11192. // copysign(fneg(x), y) -> copysign(x, y)
  11193. // copysign(copysign(x,z), y) -> copysign(x, y)
  11194. if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
  11195. N0.getOpcode() == ISD::FCOPYSIGN)
  11196. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
  11197. // copysign(x, abs(y)) -> abs(x)
  11198. if (N1.getOpcode() == ISD::FABS)
  11199. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  11200. // copysign(x, copysign(y,z)) -> copysign(x, z)
  11201. if (N1.getOpcode() == ISD::FCOPYSIGN)
  11202. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
  11203. // copysign(x, fp_extend(y)) -> copysign(x, y)
  11204. // copysign(x, fp_round(y)) -> copysign(x, y)
  11205. if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
  11206. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
  11207. return SDValue();
  11208. }
  11209. SDValue DAGCombiner::visitFPOW(SDNode *N) {
  11210. ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
  11211. if (!ExponentC)
  11212. return SDValue();
  11213. // Try to convert x ** (1/3) into cube root.
  11214. // TODO: Handle the various flavors of long double.
  11215. // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
  11216. // Some range near 1/3 should be fine.
  11217. EVT VT = N->getValueType(0);
  11218. if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
  11219. (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
  11220. // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
  11221. // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
  11222. // pow(-val, 1/3) = nan; cbrt(-val) = -num.
  11223. // For regular numbers, rounding may cause the results to differ.
  11224. // Therefore, we require { nsz ninf nnan afn } for this transform.
  11225. // TODO: We could select out the special cases if we don't have nsz/ninf.
  11226. SDNodeFlags Flags = N->getFlags();
  11227. if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
  11228. !Flags.hasApproximateFuncs())
  11229. return SDValue();
  11230. // Do not create a cbrt() libcall if the target does not have it, and do not
  11231. // turn a pow that has lowering support into a cbrt() libcall.
  11232. if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
  11233. (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
  11234. DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
  11235. return SDValue();
  11236. return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
  11237. }
  11238. // Try to convert x ** (1/4) and x ** (3/4) into square roots.
  11239. // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
  11240. // TODO: This could be extended (using a target hook) to handle smaller
  11241. // power-of-2 fractional exponents.
  11242. bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
  11243. bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
  11244. if (ExponentIs025 || ExponentIs075) {
  11245. // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
  11246. // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
  11247. // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
  11248. // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
  11249. // For regular numbers, rounding may cause the results to differ.
  11250. // Therefore, we require { nsz ninf afn } for this transform.
  11251. // TODO: We could select out the special cases if we don't have nsz/ninf.
  11252. SDNodeFlags Flags = N->getFlags();
  11253. // We only need no signed zeros for the 0.25 case.
  11254. if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
  11255. !Flags.hasApproximateFuncs())
  11256. return SDValue();
  11257. // Don't double the number of libcalls. We are trying to inline fast code.
  11258. if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
  11259. return SDValue();
  11260. // Assume that libcalls are the smallest code.
  11261. // TODO: This restriction should probably be lifted for vectors.
  11262. if (DAG.getMachineFunction().getFunction().hasOptSize())
  11263. return SDValue();
  11264. // pow(X, 0.25) --> sqrt(sqrt(X))
  11265. SDLoc DL(N);
  11266. SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
  11267. SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
  11268. if (ExponentIs025)
  11269. return SqrtSqrt;
  11270. // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
  11271. return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
  11272. }
  11273. return SDValue();
  11274. }
  11275. static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
  11276. const TargetLowering &TLI) {
  11277. // This optimization is guarded by a function attribute because it may produce
  11278. // unexpected results. Ie, programs may be relying on the platform-specific
  11279. // undefined behavior when the float-to-int conversion overflows.
  11280. const Function &F = DAG.getMachineFunction().getFunction();
  11281. Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
  11282. if (StrictOverflow.getValueAsString().equals("false"))
  11283. return SDValue();
  11284. // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
  11285. // replacing casts with a libcall. We also must be allowed to ignore -0.0
  11286. // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
  11287. // conversions would return +0.0.
  11288. // FIXME: We should be able to use node-level FMF here.
  11289. // TODO: If strict math, should we use FABS (+ range check for signed cast)?
  11290. EVT VT = N->getValueType(0);
  11291. if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
  11292. !DAG.getTarget().Options.NoSignedZerosFPMath)
  11293. return SDValue();
  11294. // fptosi/fptoui round towards zero, so converting from FP to integer and
  11295. // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
  11296. SDValue N0 = N->getOperand(0);
  11297. if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
  11298. N0.getOperand(0).getValueType() == VT)
  11299. return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
  11300. if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
  11301. N0.getOperand(0).getValueType() == VT)
  11302. return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
  11303. return SDValue();
  11304. }
  11305. SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
  11306. SDValue N0 = N->getOperand(0);
  11307. EVT VT = N->getValueType(0);
  11308. EVT OpVT = N0.getValueType();
  11309. // [us]itofp(undef) = 0, because the result value is bounded.
  11310. if (N0.isUndef())
  11311. return DAG.getConstantFP(0.0, SDLoc(N), VT);
  11312. // fold (sint_to_fp c1) -> c1fp
  11313. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  11314. // ...but only if the target supports immediate floating-point values
  11315. (!LegalOperations ||
  11316. TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
  11317. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  11318. // If the input is a legal type, and SINT_TO_FP is not legal on this target,
  11319. // but UINT_TO_FP is legal on this target, try to convert.
  11320. if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
  11321. hasOperation(ISD::UINT_TO_FP, OpVT)) {
  11322. // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
  11323. if (DAG.SignBitIsZero(N0))
  11324. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  11325. }
  11326. // The next optimizations are desirable only if SELECT_CC can be lowered.
  11327. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
  11328. // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  11329. if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
  11330. !VT.isVector() &&
  11331. (!LegalOperations ||
  11332. TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
  11333. SDLoc DL(N);
  11334. SDValue Ops[] =
  11335. { N0.getOperand(0), N0.getOperand(1),
  11336. DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  11337. N0.getOperand(2) };
  11338. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  11339. }
  11340. // fold (sint_to_fp (zext (setcc x, y, cc))) ->
  11341. // (select_cc x, y, 1.0, 0.0,, cc)
  11342. if (N0.getOpcode() == ISD::ZERO_EXTEND &&
  11343. N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
  11344. (!LegalOperations ||
  11345. TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
  11346. SDLoc DL(N);
  11347. SDValue Ops[] =
  11348. { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
  11349. DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  11350. N0.getOperand(0).getOperand(2) };
  11351. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  11352. }
  11353. }
  11354. if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
  11355. return FTrunc;
  11356. return SDValue();
  11357. }
  11358. SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
  11359. SDValue N0 = N->getOperand(0);
  11360. EVT VT = N->getValueType(0);
  11361. EVT OpVT = N0.getValueType();
  11362. // [us]itofp(undef) = 0, because the result value is bounded.
  11363. if (N0.isUndef())
  11364. return DAG.getConstantFP(0.0, SDLoc(N), VT);
  11365. // fold (uint_to_fp c1) -> c1fp
  11366. if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
  11367. // ...but only if the target supports immediate floating-point values
  11368. (!LegalOperations ||
  11369. TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
  11370. return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
  11371. // If the input is a legal type, and UINT_TO_FP is not legal on this target,
  11372. // but SINT_TO_FP is legal on this target, try to convert.
  11373. if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
  11374. hasOperation(ISD::SINT_TO_FP, OpVT)) {
  11375. // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
  11376. if (DAG.SignBitIsZero(N0))
  11377. return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
  11378. }
  11379. // The next optimizations are desirable only if SELECT_CC can be lowered.
  11380. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
  11381. // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  11382. if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
  11383. (!LegalOperations ||
  11384. TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
  11385. SDLoc DL(N);
  11386. SDValue Ops[] =
  11387. { N0.getOperand(0), N0.getOperand(1),
  11388. DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
  11389. N0.getOperand(2) };
  11390. return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
  11391. }
  11392. }
  11393. if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
  11394. return FTrunc;
  11395. return SDValue();
  11396. }
  11397. // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
  11398. static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
  11399. SDValue N0 = N->getOperand(0);
  11400. EVT VT = N->getValueType(0);
  11401. if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
  11402. return SDValue();
  11403. SDValue Src = N0.getOperand(0);
  11404. EVT SrcVT = Src.getValueType();
  11405. bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
  11406. bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
  11407. // We can safely assume the conversion won't overflow the output range,
  11408. // because (for example) (uint8_t)18293.f is undefined behavior.
  11409. // Since we can assume the conversion won't overflow, our decision as to
  11410. // whether the input will fit in the float should depend on the minimum
  11411. // of the input range and output range.
  11412. // This means this is also safe for a signed input and unsigned output, since
  11413. // a negative input would lead to undefined behavior.
  11414. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
  11415. unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
  11416. unsigned ActualSize = std::min(InputSize, OutputSize);
  11417. const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
  11418. // We can only fold away the float conversion if the input range can be
  11419. // represented exactly in the float range.
  11420. if (APFloat::semanticsPrecision(sem) >= ActualSize) {
  11421. if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
  11422. unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
  11423. : ISD::ZERO_EXTEND;
  11424. return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
  11425. }
  11426. if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
  11427. return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
  11428. return DAG.getBitcast(VT, Src);
  11429. }
  11430. return SDValue();
  11431. }
  11432. SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
  11433. SDValue N0 = N->getOperand(0);
  11434. EVT VT = N->getValueType(0);
  11435. // fold (fp_to_sint undef) -> undef
  11436. if (N0.isUndef())
  11437. return DAG.getUNDEF(VT);
  11438. // fold (fp_to_sint c1fp) -> c1
  11439. if (isConstantFPBuildVectorOrConstantFP(N0))
  11440. return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
  11441. return FoldIntToFPToInt(N, DAG);
  11442. }
  11443. SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
  11444. SDValue N0 = N->getOperand(0);
  11445. EVT VT = N->getValueType(0);
  11446. // fold (fp_to_uint undef) -> undef
  11447. if (N0.isUndef())
  11448. return DAG.getUNDEF(VT);
  11449. // fold (fp_to_uint c1fp) -> c1
  11450. if (isConstantFPBuildVectorOrConstantFP(N0))
  11451. return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
  11452. return FoldIntToFPToInt(N, DAG);
  11453. }
  11454. SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
  11455. SDValue N0 = N->getOperand(0);
  11456. SDValue N1 = N->getOperand(1);
  11457. ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  11458. EVT VT = N->getValueType(0);
  11459. // fold (fp_round c1fp) -> c1fp
  11460. if (N0CFP)
  11461. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
  11462. // fold (fp_round (fp_extend x)) -> x
  11463. if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
  11464. return N0.getOperand(0);
  11465. // fold (fp_round (fp_round x)) -> (fp_round x)
  11466. if (N0.getOpcode() == ISD::FP_ROUND) {
  11467. const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
  11468. const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
  11469. // Skip this folding if it results in an fp_round from f80 to f16.
  11470. //
  11471. // f80 to f16 always generates an expensive (and as yet, unimplemented)
  11472. // libcall to __truncxfhf2 instead of selecting native f16 conversion
  11473. // instructions from f32 or f64. Moreover, the first (value-preserving)
  11474. // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
  11475. // x86.
  11476. if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
  11477. return SDValue();
  11478. // If the first fp_round isn't a value preserving truncation, it might
  11479. // introduce a tie in the second fp_round, that wouldn't occur in the
  11480. // single-step fp_round we want to fold to.
  11481. // In other words, double rounding isn't the same as rounding.
  11482. // Also, this is a value preserving truncation iff both fp_round's are.
  11483. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
  11484. SDLoc DL(N);
  11485. return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
  11486. DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
  11487. }
  11488. }
  11489. // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
  11490. if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
  11491. SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
  11492. N0.getOperand(0), N1);
  11493. AddToWorklist(Tmp.getNode());
  11494. return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
  11495. Tmp, N0.getOperand(1));
  11496. }
  11497. if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
  11498. return NewVSel;
  11499. return SDValue();
  11500. }
  11501. SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  11502. SDValue N0 = N->getOperand(0);
  11503. EVT VT = N->getValueType(0);
  11504. // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
  11505. if (N->hasOneUse() &&
  11506. N->use_begin()->getOpcode() == ISD::FP_ROUND)
  11507. return SDValue();
  11508. // fold (fp_extend c1fp) -> c1fp
  11509. if (isConstantFPBuildVectorOrConstantFP(N0))
  11510. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
  11511. // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
  11512. if (N0.getOpcode() == ISD::FP16_TO_FP &&
  11513. TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
  11514. return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
  11515. // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
  11516. // value of X.
  11517. if (N0.getOpcode() == ISD::FP_ROUND
  11518. && N0.getConstantOperandVal(1) == 1) {
  11519. SDValue In = N0.getOperand(0);
  11520. if (In.getValueType() == VT) return In;
  11521. if (VT.bitsLT(In.getValueType()))
  11522. return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
  11523. In, N0.getOperand(1));
  11524. return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
  11525. }
  11526. // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
  11527. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  11528. TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
  11529. LoadSDNode *LN0 = cast<LoadSDNode>(N0);
  11530. SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
  11531. LN0->getChain(),
  11532. LN0->getBasePtr(), N0.getValueType(),
  11533. LN0->getMemOperand());
  11534. CombineTo(N, ExtLoad);
  11535. CombineTo(N0.getNode(),
  11536. DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
  11537. N0.getValueType(), ExtLoad,
  11538. DAG.getIntPtrConstant(1, SDLoc(N0))),
  11539. ExtLoad.getValue(1));
  11540. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  11541. }
  11542. if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
  11543. return NewVSel;
  11544. return SDValue();
  11545. }
  11546. SDValue DAGCombiner::visitFCEIL(SDNode *N) {
  11547. SDValue N0 = N->getOperand(0);
  11548. EVT VT = N->getValueType(0);
  11549. // fold (fceil c1) -> fceil(c1)
  11550. if (isConstantFPBuildVectorOrConstantFP(N0))
  11551. return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
  11552. return SDValue();
  11553. }
  11554. SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
  11555. SDValue N0 = N->getOperand(0);
  11556. EVT VT = N->getValueType(0);
  11557. // fold (ftrunc c1) -> ftrunc(c1)
  11558. if (isConstantFPBuildVectorOrConstantFP(N0))
  11559. return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
  11560. // fold ftrunc (known rounded int x) -> x
  11561. // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
  11562. // likely to be generated to extract integer from a rounded floating value.
  11563. switch (N0.getOpcode()) {
  11564. default: break;
  11565. case ISD::FRINT:
  11566. case ISD::FTRUNC:
  11567. case ISD::FNEARBYINT:
  11568. case ISD::FFLOOR:
  11569. case ISD::FCEIL:
  11570. return N0;
  11571. }
  11572. return SDValue();
  11573. }
  11574. SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
  11575. SDValue N0 = N->getOperand(0);
  11576. EVT VT = N->getValueType(0);
  11577. // fold (ffloor c1) -> ffloor(c1)
  11578. if (isConstantFPBuildVectorOrConstantFP(N0))
  11579. return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
  11580. return SDValue();
  11581. }
  11582. // FIXME: FNEG and FABS have a lot in common; refactor.
  11583. SDValue DAGCombiner::visitFNEG(SDNode *N) {
  11584. SDValue N0 = N->getOperand(0);
  11585. EVT VT = N->getValueType(0);
  11586. // Constant fold FNEG.
  11587. if (isConstantFPBuildVectorOrConstantFP(N0))
  11588. return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
  11589. if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
  11590. &DAG.getTarget().Options, ForCodeSize))
  11591. return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
  11592. // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
  11593. // constant pool values.
  11594. if (!TLI.isFNegFree(VT) &&
  11595. N0.getOpcode() == ISD::BITCAST &&
  11596. N0.getNode()->hasOneUse()) {
  11597. SDValue Int = N0.getOperand(0);
  11598. EVT IntVT = Int.getValueType();
  11599. if (IntVT.isInteger() && !IntVT.isVector()) {
  11600. APInt SignMask;
  11601. if (N0.getValueType().isVector()) {
  11602. // For a vector, get a mask such as 0x80... per scalar element
  11603. // and splat it.
  11604. SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
  11605. SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
  11606. } else {
  11607. // For a scalar, just generate 0x80...
  11608. SignMask = APInt::getSignMask(IntVT.getSizeInBits());
  11609. }
  11610. SDLoc DL0(N0);
  11611. Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
  11612. DAG.getConstant(SignMask, DL0, IntVT));
  11613. AddToWorklist(Int.getNode());
  11614. return DAG.getBitcast(VT, Int);
  11615. }
  11616. }
  11617. // (fneg (fmul c, x)) -> (fmul -c, x)
  11618. if (N0.getOpcode() == ISD::FMUL &&
  11619. (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
  11620. ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
  11621. if (CFP1) {
  11622. APFloat CVal = CFP1->getValueAPF();
  11623. CVal.changeSign();
  11624. if (Level >= AfterLegalizeDAG &&
  11625. (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
  11626. TLI.isOperationLegal(ISD::ConstantFP, VT)))
  11627. return DAG.getNode(
  11628. ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
  11629. DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
  11630. N0->getFlags());
  11631. }
  11632. }
  11633. return SDValue();
  11634. }
  11635. static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
  11636. APFloat (*Op)(const APFloat &, const APFloat &)) {
  11637. SDValue N0 = N->getOperand(0);
  11638. SDValue N1 = N->getOperand(1);
  11639. EVT VT = N->getValueType(0);
  11640. const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
  11641. const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
  11642. if (N0CFP && N1CFP) {
  11643. const APFloat &C0 = N0CFP->getValueAPF();
  11644. const APFloat &C1 = N1CFP->getValueAPF();
  11645. return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
  11646. }
  11647. // Canonicalize to constant on RHS.
  11648. if (isConstantFPBuildVectorOrConstantFP(N0) &&
  11649. !isConstantFPBuildVectorOrConstantFP(N1))
  11650. return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
  11651. return SDValue();
  11652. }
  11653. SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
  11654. return visitFMinMax(DAG, N, minnum);
  11655. }
  11656. SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
  11657. return visitFMinMax(DAG, N, maxnum);
  11658. }
  11659. SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
  11660. return visitFMinMax(DAG, N, minimum);
  11661. }
  11662. SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
  11663. return visitFMinMax(DAG, N, maximum);
  11664. }
  11665. SDValue DAGCombiner::visitFABS(SDNode *N) {
  11666. SDValue N0 = N->getOperand(0);
  11667. EVT VT = N->getValueType(0);
  11668. // fold (fabs c1) -> fabs(c1)
  11669. if (isConstantFPBuildVectorOrConstantFP(N0))
  11670. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
  11671. // fold (fabs (fabs x)) -> (fabs x)
  11672. if (N0.getOpcode() == ISD::FABS)
  11673. return N->getOperand(0);
  11674. // fold (fabs (fneg x)) -> (fabs x)
  11675. // fold (fabs (fcopysign x, y)) -> (fabs x)
  11676. if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
  11677. return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
  11678. // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
  11679. if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
  11680. SDValue Int = N0.getOperand(0);
  11681. EVT IntVT = Int.getValueType();
  11682. if (IntVT.isInteger() && !IntVT.isVector()) {
  11683. APInt SignMask;
  11684. if (N0.getValueType().isVector()) {
  11685. // For a vector, get a mask such as 0x7f... per scalar element
  11686. // and splat it.
  11687. SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
  11688. SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
  11689. } else {
  11690. // For a scalar, just generate 0x7f...
  11691. SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
  11692. }
  11693. SDLoc DL(N0);
  11694. Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
  11695. DAG.getConstant(SignMask, DL, IntVT));
  11696. AddToWorklist(Int.getNode());
  11697. return DAG.getBitcast(N->getValueType(0), Int);
  11698. }
  11699. }
  11700. return SDValue();
  11701. }
  11702. SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  11703. SDValue Chain = N->getOperand(0);
  11704. SDValue N1 = N->getOperand(1);
  11705. SDValue N2 = N->getOperand(2);
  11706. // If N is a constant we could fold this into a fallthrough or unconditional
  11707. // branch. However that doesn't happen very often in normal code, because
  11708. // Instcombine/SimplifyCFG should have handled the available opportunities.
  11709. // If we did this folding here, it would be necessary to update the
  11710. // MachineBasicBlock CFG, which is awkward.
  11711. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
  11712. // on the target.
  11713. if (N1.getOpcode() == ISD::SETCC &&
  11714. TLI.isOperationLegalOrCustom(ISD::BR_CC,
  11715. N1.getOperand(0).getValueType())) {
  11716. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  11717. Chain, N1.getOperand(2),
  11718. N1.getOperand(0), N1.getOperand(1), N2);
  11719. }
  11720. if (N1.hasOneUse()) {
  11721. if (SDValue NewN1 = rebuildSetCC(N1))
  11722. return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
  11723. }
  11724. return SDValue();
  11725. }
  11726. SDValue DAGCombiner::rebuildSetCC(SDValue N) {
  11727. if (N.getOpcode() == ISD::SRL ||
  11728. (N.getOpcode() == ISD::TRUNCATE &&
  11729. (N.getOperand(0).hasOneUse() &&
  11730. N.getOperand(0).getOpcode() == ISD::SRL))) {
  11731. // Look pass the truncate.
  11732. if (N.getOpcode() == ISD::TRUNCATE)
  11733. N = N.getOperand(0);
  11734. // Match this pattern so that we can generate simpler code:
  11735. //
  11736. // %a = ...
  11737. // %b = and i32 %a, 2
  11738. // %c = srl i32 %b, 1
  11739. // brcond i32 %c ...
  11740. //
  11741. // into
  11742. //
  11743. // %a = ...
  11744. // %b = and i32 %a, 2
  11745. // %c = setcc eq %b, 0
  11746. // brcond %c ...
  11747. //
  11748. // This applies only when the AND constant value has one bit set and the
  11749. // SRL constant is equal to the log2 of the AND constant. The back-end is
  11750. // smart enough to convert the result into a TEST/JMP sequence.
  11751. SDValue Op0 = N.getOperand(0);
  11752. SDValue Op1 = N.getOperand(1);
  11753. if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
  11754. SDValue AndOp1 = Op0.getOperand(1);
  11755. if (AndOp1.getOpcode() == ISD::Constant) {
  11756. const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
  11757. if (AndConst.isPowerOf2() &&
  11758. cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
  11759. SDLoc DL(N);
  11760. return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
  11761. Op0, DAG.getConstant(0, DL, Op0.getValueType()),
  11762. ISD::SETNE);
  11763. }
  11764. }
  11765. }
  11766. }
  11767. // Transform br(xor(x, y)) -> br(x != y)
  11768. // Transform br(xor(xor(x,y), 1)) -> br (x == y)
  11769. if (N.getOpcode() == ISD::XOR) {
  11770. // Because we may call this on a speculatively constructed
  11771. // SimplifiedSetCC Node, we need to simplify this node first.
  11772. // Ideally this should be folded into SimplifySetCC and not
  11773. // here. For now, grab a handle to N so we don't lose it from
  11774. // replacements interal to the visit.
  11775. HandleSDNode XORHandle(N);
  11776. while (N.getOpcode() == ISD::XOR) {
  11777. SDValue Tmp = visitXOR(N.getNode());
  11778. // No simplification done.
  11779. if (!Tmp.getNode())
  11780. break;
  11781. // Returning N is form in-visit replacement that may invalidated
  11782. // N. Grab value from Handle.
  11783. if (Tmp.getNode() == N.getNode())
  11784. N = XORHandle.getValue();
  11785. else // Node simplified. Try simplifying again.
  11786. N = Tmp;
  11787. }
  11788. if (N.getOpcode() != ISD::XOR)
  11789. return N;
  11790. SDNode *TheXor = N.getNode();
  11791. SDValue Op0 = TheXor->getOperand(0);
  11792. SDValue Op1 = TheXor->getOperand(1);
  11793. if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
  11794. bool Equal = false;
  11795. if (isOneConstant(Op0) && Op0.hasOneUse() &&
  11796. Op0.getOpcode() == ISD::XOR) {
  11797. TheXor = Op0.getNode();
  11798. Equal = true;
  11799. }
  11800. EVT SetCCVT = N.getValueType();
  11801. if (LegalTypes)
  11802. SetCCVT = getSetCCResultType(SetCCVT);
  11803. // Replace the uses of XOR with SETCC
  11804. return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
  11805. Equal ? ISD::SETEQ : ISD::SETNE);
  11806. }
  11807. }
  11808. return SDValue();
  11809. }
  11810. // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
  11811. //
  11812. SDValue DAGCombiner::visitBR_CC(SDNode *N) {
  11813. CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
  11814. SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
  11815. // If N is a constant we could fold this into a fallthrough or unconditional
  11816. // branch. However that doesn't happen very often in normal code, because
  11817. // Instcombine/SimplifyCFG should have handled the available opportunities.
  11818. // If we did this folding here, it would be necessary to update the
  11819. // MachineBasicBlock CFG, which is awkward.
  11820. // Use SimplifySetCC to simplify SETCC's.
  11821. SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
  11822. CondLHS, CondRHS, CC->get(), SDLoc(N),
  11823. false);
  11824. if (Simp.getNode()) AddToWorklist(Simp.getNode());
  11825. // fold to a simpler setcc
  11826. if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
  11827. return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
  11828. N->getOperand(0), Simp.getOperand(2),
  11829. Simp.getOperand(0), Simp.getOperand(1),
  11830. N->getOperand(4));
  11831. return SDValue();
  11832. }
  11833. /// Return true if 'Use' is a load or a store that uses N as its base pointer
  11834. /// and that N may be folded in the load / store addressing mode.
  11835. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
  11836. SelectionDAG &DAG,
  11837. const TargetLowering &TLI) {
  11838. EVT VT;
  11839. unsigned AS;
  11840. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
  11841. if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
  11842. return false;
  11843. VT = LD->getMemoryVT();
  11844. AS = LD->getAddressSpace();
  11845. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
  11846. if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
  11847. return false;
  11848. VT = ST->getMemoryVT();
  11849. AS = ST->getAddressSpace();
  11850. } else
  11851. return false;
  11852. TargetLowering::AddrMode AM;
  11853. if (N->getOpcode() == ISD::ADD) {
  11854. AM.HasBaseReg = true;
  11855. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  11856. if (Offset)
  11857. // [reg +/- imm]
  11858. AM.BaseOffs = Offset->getSExtValue();
  11859. else
  11860. // [reg +/- reg]
  11861. AM.Scale = 1;
  11862. } else if (N->getOpcode() == ISD::SUB) {
  11863. AM.HasBaseReg = true;
  11864. ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
  11865. if (Offset)
  11866. // [reg +/- imm]
  11867. AM.BaseOffs = -Offset->getSExtValue();
  11868. else
  11869. // [reg +/- reg]
  11870. AM.Scale = 1;
  11871. } else
  11872. return false;
  11873. return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
  11874. VT.getTypeForEVT(*DAG.getContext()), AS);
  11875. }
  11876. /// Try turning a load/store into a pre-indexed load/store when the base
  11877. /// pointer is an add or subtract and it has other uses besides the load/store.
  11878. /// After the transformation, the new indexed load/store has effectively folded
  11879. /// the add/subtract in and all of its other uses are redirected to the
  11880. /// new load/store.
  11881. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
  11882. if (Level < AfterLegalizeDAG)
  11883. return false;
  11884. bool isLoad = true;
  11885. SDValue Ptr;
  11886. EVT VT;
  11887. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  11888. if (LD->isIndexed())
  11889. return false;
  11890. VT = LD->getMemoryVT();
  11891. if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
  11892. !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
  11893. return false;
  11894. Ptr = LD->getBasePtr();
  11895. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  11896. if (ST->isIndexed())
  11897. return false;
  11898. VT = ST->getMemoryVT();
  11899. if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
  11900. !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
  11901. return false;
  11902. Ptr = ST->getBasePtr();
  11903. isLoad = false;
  11904. } else {
  11905. return false;
  11906. }
  11907. // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
  11908. // out. There is no reason to make this a preinc/predec.
  11909. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
  11910. Ptr.getNode()->hasOneUse())
  11911. return false;
  11912. // Ask the target to do addressing mode selection.
  11913. SDValue BasePtr;
  11914. SDValue Offset;
  11915. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  11916. if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
  11917. return false;
  11918. // Backends without true r+i pre-indexed forms may need to pass a
  11919. // constant base with a variable offset so that constant coercion
  11920. // will work with the patterns in canonical form.
  11921. bool Swapped = false;
  11922. if (isa<ConstantSDNode>(BasePtr)) {
  11923. std::swap(BasePtr, Offset);
  11924. Swapped = true;
  11925. }
  11926. // Don't create a indexed load / store with zero offset.
  11927. if (isNullConstant(Offset))
  11928. return false;
  11929. // Try turning it into a pre-indexed load / store except when:
  11930. // 1) The new base ptr is a frame index.
  11931. // 2) If N is a store and the new base ptr is either the same as or is a
  11932. // predecessor of the value being stored.
  11933. // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
  11934. // that would create a cycle.
  11935. // 4) All uses are load / store ops that use it as old base ptr.
  11936. // Check #1. Preinc'ing a frame index would require copying the stack pointer
  11937. // (plus the implicit offset) to a register to preinc anyway.
  11938. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  11939. return false;
  11940. // Check #2.
  11941. if (!isLoad) {
  11942. SDValue Val = cast<StoreSDNode>(N)->getValue();
  11943. // Would require a copy.
  11944. if (Val == BasePtr)
  11945. return false;
  11946. // Would create a cycle.
  11947. if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
  11948. return false;
  11949. }
  11950. // Caches for hasPredecessorHelper.
  11951. SmallPtrSet<const SDNode *, 32> Visited;
  11952. SmallVector<const SDNode *, 16> Worklist;
  11953. Worklist.push_back(N);
  11954. // If the offset is a constant, there may be other adds of constants that
  11955. // can be folded with this one. We should do this to avoid having to keep
  11956. // a copy of the original base pointer.
  11957. SmallVector<SDNode *, 16> OtherUses;
  11958. if (isa<ConstantSDNode>(Offset))
  11959. for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
  11960. UE = BasePtr.getNode()->use_end();
  11961. UI != UE; ++UI) {
  11962. SDUse &Use = UI.getUse();
  11963. // Skip the use that is Ptr and uses of other results from BasePtr's
  11964. // node (important for nodes that return multiple results).
  11965. if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
  11966. continue;
  11967. if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
  11968. continue;
  11969. if (Use.getUser()->getOpcode() != ISD::ADD &&
  11970. Use.getUser()->getOpcode() != ISD::SUB) {
  11971. OtherUses.clear();
  11972. break;
  11973. }
  11974. SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
  11975. if (!isa<ConstantSDNode>(Op1)) {
  11976. OtherUses.clear();
  11977. break;
  11978. }
  11979. // FIXME: In some cases, we can be smarter about this.
  11980. if (Op1.getValueType() != Offset.getValueType()) {
  11981. OtherUses.clear();
  11982. break;
  11983. }
  11984. OtherUses.push_back(Use.getUser());
  11985. }
  11986. if (Swapped)
  11987. std::swap(BasePtr, Offset);
  11988. // Now check for #3 and #4.
  11989. bool RealUse = false;
  11990. for (SDNode *Use : Ptr.getNode()->uses()) {
  11991. if (Use == N)
  11992. continue;
  11993. if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
  11994. return false;
  11995. // If Ptr may be folded in addressing mode of other use, then it's
  11996. // not profitable to do this transformation.
  11997. if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
  11998. RealUse = true;
  11999. }
  12000. if (!RealUse)
  12001. return false;
  12002. SDValue Result;
  12003. if (isLoad)
  12004. Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  12005. BasePtr, Offset, AM);
  12006. else
  12007. Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  12008. BasePtr, Offset, AM);
  12009. ++PreIndexedNodes;
  12010. ++NodesCombined;
  12011. LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
  12012. Result.getNode()->dump(&DAG); dbgs() << '\n');
  12013. WorklistRemover DeadNodes(*this);
  12014. if (isLoad) {
  12015. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  12016. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  12017. } else {
  12018. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  12019. }
  12020. // Finally, since the node is now dead, remove it from the graph.
  12021. deleteAndRecombine(N);
  12022. if (Swapped)
  12023. std::swap(BasePtr, Offset);
  12024. // Replace other uses of BasePtr that can be updated to use Ptr
  12025. for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
  12026. unsigned OffsetIdx = 1;
  12027. if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
  12028. OffsetIdx = 0;
  12029. assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
  12030. BasePtr.getNode() && "Expected BasePtr operand");
  12031. // We need to replace ptr0 in the following expression:
  12032. // x0 * offset0 + y0 * ptr0 = t0
  12033. // knowing that
  12034. // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
  12035. //
  12036. // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
  12037. // indexed load/store and the expression that needs to be re-written.
  12038. //
  12039. // Therefore, we have:
  12040. // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
  12041. ConstantSDNode *CN =
  12042. cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
  12043. int X0, X1, Y0, Y1;
  12044. const APInt &Offset0 = CN->getAPIntValue();
  12045. APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
  12046. X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
  12047. Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
  12048. X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
  12049. Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
  12050. unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
  12051. APInt CNV = Offset0;
  12052. if (X0 < 0) CNV = -CNV;
  12053. if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
  12054. else CNV = CNV - Offset1;
  12055. SDLoc DL(OtherUses[i]);
  12056. // We can now generate the new expression.
  12057. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
  12058. SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
  12059. SDValue NewUse = DAG.getNode(Opcode,
  12060. DL,
  12061. OtherUses[i]->getValueType(0), NewOp1, NewOp2);
  12062. DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
  12063. deleteAndRecombine(OtherUses[i]);
  12064. }
  12065. // Replace the uses of Ptr with uses of the updated base value.
  12066. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
  12067. deleteAndRecombine(Ptr.getNode());
  12068. AddToWorklist(Result.getNode());
  12069. return true;
  12070. }
  12071. /// Try to combine a load/store with a add/sub of the base pointer node into a
  12072. /// post-indexed load/store. The transformation folded the add/subtract into the
  12073. /// new indexed load/store effectively and all of its uses are redirected to the
  12074. /// new load/store.
  12075. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
  12076. if (Level < AfterLegalizeDAG)
  12077. return false;
  12078. bool isLoad = true;
  12079. SDValue Ptr;
  12080. EVT VT;
  12081. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  12082. if (LD->isIndexed())
  12083. return false;
  12084. VT = LD->getMemoryVT();
  12085. if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
  12086. !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
  12087. return false;
  12088. Ptr = LD->getBasePtr();
  12089. } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
  12090. if (ST->isIndexed())
  12091. return false;
  12092. VT = ST->getMemoryVT();
  12093. if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
  12094. !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
  12095. return false;
  12096. Ptr = ST->getBasePtr();
  12097. isLoad = false;
  12098. } else {
  12099. return false;
  12100. }
  12101. if (Ptr.getNode()->hasOneUse())
  12102. return false;
  12103. for (SDNode *Op : Ptr.getNode()->uses()) {
  12104. if (Op == N ||
  12105. (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
  12106. continue;
  12107. SDValue BasePtr;
  12108. SDValue Offset;
  12109. ISD::MemIndexedMode AM = ISD::UNINDEXED;
  12110. if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
  12111. // Don't create a indexed load / store with zero offset.
  12112. if (isNullConstant(Offset))
  12113. continue;
  12114. // Try turning it into a post-indexed load / store except when
  12115. // 1) All uses are load / store ops that use it as base ptr (and
  12116. // it may be folded as addressing mmode).
  12117. // 2) Op must be independent of N, i.e. Op is neither a predecessor
  12118. // nor a successor of N. Otherwise, if Op is folded that would
  12119. // create a cycle.
  12120. if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
  12121. continue;
  12122. // Check for #1.
  12123. bool TryNext = false;
  12124. for (SDNode *Use : BasePtr.getNode()->uses()) {
  12125. if (Use == Ptr.getNode())
  12126. continue;
  12127. // If all the uses are load / store addresses, then don't do the
  12128. // transformation.
  12129. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
  12130. bool RealUse = false;
  12131. for (SDNode *UseUse : Use->uses()) {
  12132. if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
  12133. RealUse = true;
  12134. }
  12135. if (!RealUse) {
  12136. TryNext = true;
  12137. break;
  12138. }
  12139. }
  12140. }
  12141. if (TryNext)
  12142. continue;
  12143. // Check for #2.
  12144. SmallPtrSet<const SDNode *, 32> Visited;
  12145. SmallVector<const SDNode *, 8> Worklist;
  12146. // Ptr is predecessor to both N and Op.
  12147. Visited.insert(Ptr.getNode());
  12148. Worklist.push_back(N);
  12149. Worklist.push_back(Op);
  12150. if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
  12151. !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
  12152. SDValue Result = isLoad
  12153. ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
  12154. BasePtr, Offset, AM)
  12155. : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
  12156. BasePtr, Offset, AM);
  12157. ++PostIndexedNodes;
  12158. ++NodesCombined;
  12159. LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
  12160. dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
  12161. dbgs() << '\n');
  12162. WorklistRemover DeadNodes(*this);
  12163. if (isLoad) {
  12164. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
  12165. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
  12166. } else {
  12167. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
  12168. }
  12169. // Finally, since the node is now dead, remove it from the graph.
  12170. deleteAndRecombine(N);
  12171. // Replace the uses of Use with uses of the updated base value.
  12172. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
  12173. Result.getValue(isLoad ? 1 : 0));
  12174. deleteAndRecombine(Op);
  12175. return true;
  12176. }
  12177. }
  12178. }
  12179. return false;
  12180. }
  12181. /// Return the base-pointer arithmetic from an indexed \p LD.
  12182. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
  12183. ISD::MemIndexedMode AM = LD->getAddressingMode();
  12184. assert(AM != ISD::UNINDEXED);
  12185. SDValue BP = LD->getOperand(1);
  12186. SDValue Inc = LD->getOperand(2);
  12187. // Some backends use TargetConstants for load offsets, but don't expect
  12188. // TargetConstants in general ADD nodes. We can convert these constants into
  12189. // regular Constants (if the constant is not opaque).
  12190. assert((Inc.getOpcode() != ISD::TargetConstant ||
  12191. !cast<ConstantSDNode>(Inc)->isOpaque()) &&
  12192. "Cannot split out indexing using opaque target constants");
  12193. if (Inc.getOpcode() == ISD::TargetConstant) {
  12194. ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
  12195. Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
  12196. ConstInc->getValueType(0));
  12197. }
  12198. unsigned Opc =
  12199. (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
  12200. return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
  12201. }
  12202. static inline int numVectorEltsOrZero(EVT T) {
  12203. return T.isVector() ? T.getVectorNumElements() : 0;
  12204. }
  12205. bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
  12206. Val = ST->getValue();
  12207. EVT STType = Val.getValueType();
  12208. EVT STMemType = ST->getMemoryVT();
  12209. if (STType == STMemType)
  12210. return true;
  12211. if (isTypeLegal(STMemType))
  12212. return false; // fail.
  12213. if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
  12214. TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
  12215. Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
  12216. return true;
  12217. }
  12218. if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
  12219. STType.isInteger() && STMemType.isInteger()) {
  12220. Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
  12221. return true;
  12222. }
  12223. if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
  12224. Val = DAG.getBitcast(STMemType, Val);
  12225. return true;
  12226. }
  12227. return false; // fail.
  12228. }
  12229. bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
  12230. EVT LDMemType = LD->getMemoryVT();
  12231. EVT LDType = LD->getValueType(0);
  12232. assert(Val.getValueType() == LDMemType &&
  12233. "Attempting to extend value of non-matching type");
  12234. if (LDType == LDMemType)
  12235. return true;
  12236. if (LDMemType.isInteger() && LDType.isInteger()) {
  12237. switch (LD->getExtensionType()) {
  12238. case ISD::NON_EXTLOAD:
  12239. Val = DAG.getBitcast(LDType, Val);
  12240. return true;
  12241. case ISD::EXTLOAD:
  12242. Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
  12243. return true;
  12244. case ISD::SEXTLOAD:
  12245. Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
  12246. return true;
  12247. case ISD::ZEXTLOAD:
  12248. Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
  12249. return true;
  12250. }
  12251. }
  12252. return false;
  12253. }
  12254. SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
  12255. if (OptLevel == CodeGenOpt::None || !LD->isSimple())
  12256. return SDValue();
  12257. SDValue Chain = LD->getOperand(0);
  12258. StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
  12259. // TODO: Relax this restriction for unordered atomics (see D66309)
  12260. if (!ST || !ST->isSimple())
  12261. return SDValue();
  12262. EVT LDType = LD->getValueType(0);
  12263. EVT LDMemType = LD->getMemoryVT();
  12264. EVT STMemType = ST->getMemoryVT();
  12265. EVT STType = ST->getValue().getValueType();
  12266. BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
  12267. BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
  12268. int64_t Offset;
  12269. if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
  12270. return SDValue();
  12271. // Normalize for Endianness. After this Offset=0 will denote that the least
  12272. // significant bit in the loaded value maps to the least significant bit in
  12273. // the stored value). With Offset=n (for n > 0) the loaded value starts at the
  12274. // n:th least significant byte of the stored value.
  12275. if (DAG.getDataLayout().isBigEndian())
  12276. Offset = (STMemType.getStoreSizeInBits() -
  12277. LDMemType.getStoreSizeInBits()) / 8 - Offset;
  12278. // Check that the stored value cover all bits that are loaded.
  12279. bool STCoversLD =
  12280. (Offset >= 0) &&
  12281. (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
  12282. auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
  12283. if (LD->isIndexed()) {
  12284. bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
  12285. LD->getAddressingMode() == ISD::POST_DEC);
  12286. unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
  12287. SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
  12288. LD->getOperand(1), LD->getOperand(2));
  12289. SDValue Ops[] = {Val, Idx, Chain};
  12290. return CombineTo(LD, Ops, 3);
  12291. }
  12292. return CombineTo(LD, Val, Chain);
  12293. };
  12294. if (!STCoversLD)
  12295. return SDValue();
  12296. // Memory as copy space (potentially masked).
  12297. if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
  12298. // Simple case: Direct non-truncating forwarding
  12299. if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
  12300. return ReplaceLd(LD, ST->getValue(), Chain);
  12301. // Can we model the truncate and extension with an and mask?
  12302. if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
  12303. !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
  12304. // Mask to size of LDMemType
  12305. auto Mask =
  12306. DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
  12307. STMemType.getSizeInBits()),
  12308. SDLoc(ST), STType);
  12309. auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
  12310. return ReplaceLd(LD, Val, Chain);
  12311. }
  12312. }
  12313. // TODO: Deal with nonzero offset.
  12314. if (LD->getBasePtr().isUndef() || Offset != 0)
  12315. return SDValue();
  12316. // Model necessary truncations / extenstions.
  12317. SDValue Val;
  12318. // Truncate Value To Stored Memory Size.
  12319. do {
  12320. if (!getTruncatedStoreValue(ST, Val))
  12321. continue;
  12322. if (!isTypeLegal(LDMemType))
  12323. continue;
  12324. if (STMemType != LDMemType) {
  12325. // TODO: Support vectors? This requires extract_subvector/bitcast.
  12326. if (!STMemType.isVector() && !LDMemType.isVector() &&
  12327. STMemType.isInteger() && LDMemType.isInteger())
  12328. Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
  12329. else
  12330. continue;
  12331. }
  12332. if (!extendLoadedValueToExtension(LD, Val))
  12333. continue;
  12334. return ReplaceLd(LD, Val, Chain);
  12335. } while (false);
  12336. // On failure, cleanup dead nodes we may have created.
  12337. if (Val->use_empty())
  12338. deleteAndRecombine(Val.getNode());
  12339. return SDValue();
  12340. }
  12341. SDValue DAGCombiner::visitLOAD(SDNode *N) {
  12342. LoadSDNode *LD = cast<LoadSDNode>(N);
  12343. SDValue Chain = LD->getChain();
  12344. SDValue Ptr = LD->getBasePtr();
  12345. // If load is not volatile and there are no uses of the loaded value (and
  12346. // the updated indexed value in case of indexed loads), change uses of the
  12347. // chain value into uses of the chain input (i.e. delete the dead load).
  12348. // TODO: Allow this for unordered atomics (see D66309)
  12349. if (LD->isSimple()) {
  12350. if (N->getValueType(1) == MVT::Other) {
  12351. // Unindexed loads.
  12352. if (!N->hasAnyUseOfValue(0)) {
  12353. // It's not safe to use the two value CombineTo variant here. e.g.
  12354. // v1, chain2 = load chain1, loc
  12355. // v2, chain3 = load chain2, loc
  12356. // v3 = add v2, c
  12357. // Now we replace use of chain2 with chain1. This makes the second load
  12358. // isomorphic to the one we are deleting, and thus makes this load live.
  12359. LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
  12360. dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
  12361. dbgs() << "\n");
  12362. WorklistRemover DeadNodes(*this);
  12363. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  12364. AddUsersToWorklist(Chain.getNode());
  12365. if (N->use_empty())
  12366. deleteAndRecombine(N);
  12367. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  12368. }
  12369. } else {
  12370. // Indexed loads.
  12371. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
  12372. // If this load has an opaque TargetConstant offset, then we cannot split
  12373. // the indexing into an add/sub directly (that TargetConstant may not be
  12374. // valid for a different type of node, and we cannot convert an opaque
  12375. // target constant into a regular constant).
  12376. bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
  12377. cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
  12378. if (!N->hasAnyUseOfValue(0) &&
  12379. ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
  12380. SDValue Undef = DAG.getUNDEF(N->getValueType(0));
  12381. SDValue Index;
  12382. if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
  12383. Index = SplitIndexingFromLoad(LD);
  12384. // Try to fold the base pointer arithmetic into subsequent loads and
  12385. // stores.
  12386. AddUsersToWorklist(N);
  12387. } else
  12388. Index = DAG.getUNDEF(N->getValueType(1));
  12389. LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
  12390. dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
  12391. dbgs() << " and 2 other values\n");
  12392. WorklistRemover DeadNodes(*this);
  12393. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
  12394. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
  12395. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
  12396. deleteAndRecombine(N);
  12397. return SDValue(N, 0); // Return N so it doesn't get rechecked!
  12398. }
  12399. }
  12400. }
  12401. // If this load is directly stored, replace the load value with the stored
  12402. // value.
  12403. if (auto V = ForwardStoreValueToDirectLoad(LD))
  12404. return V;
  12405. // Try to infer better alignment information than the load already has.
  12406. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
  12407. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  12408. if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
  12409. SDValue NewLoad = DAG.getExtLoad(
  12410. LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
  12411. LD->getPointerInfo(), LD->getMemoryVT(), Align,
  12412. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  12413. // NewLoad will always be N as we are only refining the alignment
  12414. assert(NewLoad.getNode() == N);
  12415. (void)NewLoad;
  12416. }
  12417. }
  12418. }
  12419. if (LD->isUnindexed()) {
  12420. // Walk up chain skipping non-aliasing memory nodes.
  12421. SDValue BetterChain = FindBetterChain(LD, Chain);
  12422. // If there is a better chain.
  12423. if (Chain != BetterChain) {
  12424. SDValue ReplLoad;
  12425. // Replace the chain to void dependency.
  12426. if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
  12427. ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
  12428. BetterChain, Ptr, LD->getMemOperand());
  12429. } else {
  12430. ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
  12431. LD->getValueType(0),
  12432. BetterChain, Ptr, LD->getMemoryVT(),
  12433. LD->getMemOperand());
  12434. }
  12435. // Create token factor to keep old chain connected.
  12436. SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
  12437. MVT::Other, Chain, ReplLoad.getValue(1));
  12438. // Replace uses with load result and token factor
  12439. return CombineTo(N, ReplLoad.getValue(0), Token);
  12440. }
  12441. }
  12442. // Try transforming N to an indexed load.
  12443. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  12444. return SDValue(N, 0);
  12445. // Try to slice up N to more direct loads if the slices are mapped to
  12446. // different register banks or pairing can take place.
  12447. if (SliceUpLoad(N))
  12448. return SDValue(N, 0);
  12449. return SDValue();
  12450. }
  12451. namespace {
  12452. /// Helper structure used to slice a load in smaller loads.
  12453. /// Basically a slice is obtained from the following sequence:
  12454. /// Origin = load Ty1, Base
  12455. /// Shift = srl Ty1 Origin, CstTy Amount
  12456. /// Inst = trunc Shift to Ty2
  12457. ///
  12458. /// Then, it will be rewritten into:
  12459. /// Slice = load SliceTy, Base + SliceOffset
  12460. /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
  12461. ///
  12462. /// SliceTy is deduced from the number of bits that are actually used to
  12463. /// build Inst.
  12464. struct LoadedSlice {
  12465. /// Helper structure used to compute the cost of a slice.
  12466. struct Cost {
  12467. /// Are we optimizing for code size.
  12468. bool ForCodeSize = false;
  12469. /// Various cost.
  12470. unsigned Loads = 0;
  12471. unsigned Truncates = 0;
  12472. unsigned CrossRegisterBanksCopies = 0;
  12473. unsigned ZExts = 0;
  12474. unsigned Shift = 0;
  12475. explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
  12476. /// Get the cost of one isolated slice.
  12477. Cost(const LoadedSlice &LS, bool ForCodeSize)
  12478. : ForCodeSize(ForCodeSize), Loads(1) {
  12479. EVT TruncType = LS.Inst->getValueType(0);
  12480. EVT LoadedType = LS.getLoadedType();
  12481. if (TruncType != LoadedType &&
  12482. !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
  12483. ZExts = 1;
  12484. }
  12485. /// Account for slicing gain in the current cost.
  12486. /// Slicing provide a few gains like removing a shift or a
  12487. /// truncate. This method allows to grow the cost of the original
  12488. /// load with the gain from this slice.
  12489. void addSliceGain(const LoadedSlice &LS) {
  12490. // Each slice saves a truncate.
  12491. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
  12492. if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
  12493. LS.Inst->getValueType(0)))
  12494. ++Truncates;
  12495. // If there is a shift amount, this slice gets rid of it.
  12496. if (LS.Shift)
  12497. ++Shift;
  12498. // If this slice can merge a cross register bank copy, account for it.
  12499. if (LS.canMergeExpensiveCrossRegisterBankCopy())
  12500. ++CrossRegisterBanksCopies;
  12501. }
  12502. Cost &operator+=(const Cost &RHS) {
  12503. Loads += RHS.Loads;
  12504. Truncates += RHS.Truncates;
  12505. CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
  12506. ZExts += RHS.ZExts;
  12507. Shift += RHS.Shift;
  12508. return *this;
  12509. }
  12510. bool operator==(const Cost &RHS) const {
  12511. return Loads == RHS.Loads && Truncates == RHS.Truncates &&
  12512. CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
  12513. ZExts == RHS.ZExts && Shift == RHS.Shift;
  12514. }
  12515. bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
  12516. bool operator<(const Cost &RHS) const {
  12517. // Assume cross register banks copies are as expensive as loads.
  12518. // FIXME: Do we want some more target hooks?
  12519. unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
  12520. unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
  12521. // Unless we are optimizing for code size, consider the
  12522. // expensive operation first.
  12523. if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
  12524. return ExpensiveOpsLHS < ExpensiveOpsRHS;
  12525. return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
  12526. (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
  12527. }
  12528. bool operator>(const Cost &RHS) const { return RHS < *this; }
  12529. bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
  12530. bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
  12531. };
  12532. // The last instruction that represent the slice. This should be a
  12533. // truncate instruction.
  12534. SDNode *Inst;
  12535. // The original load instruction.
  12536. LoadSDNode *Origin;
  12537. // The right shift amount in bits from the original load.
  12538. unsigned Shift;
  12539. // The DAG from which Origin came from.
  12540. // This is used to get some contextual information about legal types, etc.
  12541. SelectionDAG *DAG;
  12542. LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
  12543. unsigned Shift = 0, SelectionDAG *DAG = nullptr)
  12544. : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
  12545. /// Get the bits used in a chunk of bits \p BitWidth large.
  12546. /// \return Result is \p BitWidth and has used bits set to 1 and
  12547. /// not used bits set to 0.
  12548. APInt getUsedBits() const {
  12549. // Reproduce the trunc(lshr) sequence:
  12550. // - Start from the truncated value.
  12551. // - Zero extend to the desired bit width.
  12552. // - Shift left.
  12553. assert(Origin && "No original load to compare against.");
  12554. unsigned BitWidth = Origin->getValueSizeInBits(0);
  12555. assert(Inst && "This slice is not bound to an instruction");
  12556. assert(Inst->getValueSizeInBits(0) <= BitWidth &&
  12557. "Extracted slice is bigger than the whole type!");
  12558. APInt UsedBits(Inst->getValueSizeInBits(0), 0);
  12559. UsedBits.setAllBits();
  12560. UsedBits = UsedBits.zext(BitWidth);
  12561. UsedBits <<= Shift;
  12562. return UsedBits;
  12563. }
  12564. /// Get the size of the slice to be loaded in bytes.
  12565. unsigned getLoadedSize() const {
  12566. unsigned SliceSize = getUsedBits().countPopulation();
  12567. assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
  12568. return SliceSize / 8;
  12569. }
  12570. /// Get the type that will be loaded for this slice.
  12571. /// Note: This may not be the final type for the slice.
  12572. EVT getLoadedType() const {
  12573. assert(DAG && "Missing context");
  12574. LLVMContext &Ctxt = *DAG->getContext();
  12575. return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
  12576. }
  12577. /// Get the alignment of the load used for this slice.
  12578. unsigned getAlignment() const {
  12579. unsigned Alignment = Origin->getAlignment();
  12580. uint64_t Offset = getOffsetFromBase();
  12581. if (Offset != 0)
  12582. Alignment = MinAlign(Alignment, Alignment + Offset);
  12583. return Alignment;
  12584. }
  12585. /// Check if this slice can be rewritten with legal operations.
  12586. bool isLegal() const {
  12587. // An invalid slice is not legal.
  12588. if (!Origin || !Inst || !DAG)
  12589. return false;
  12590. // Offsets are for indexed load only, we do not handle that.
  12591. if (!Origin->getOffset().isUndef())
  12592. return false;
  12593. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  12594. // Check that the type is legal.
  12595. EVT SliceType = getLoadedType();
  12596. if (!TLI.isTypeLegal(SliceType))
  12597. return false;
  12598. // Check that the load is legal for this type.
  12599. if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
  12600. return false;
  12601. // Check that the offset can be computed.
  12602. // 1. Check its type.
  12603. EVT PtrType = Origin->getBasePtr().getValueType();
  12604. if (PtrType == MVT::Untyped || PtrType.isExtended())
  12605. return false;
  12606. // 2. Check that it fits in the immediate.
  12607. if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
  12608. return false;
  12609. // 3. Check that the computation is legal.
  12610. if (!TLI.isOperationLegal(ISD::ADD, PtrType))
  12611. return false;
  12612. // Check that the zext is legal if it needs one.
  12613. EVT TruncateType = Inst->getValueType(0);
  12614. if (TruncateType != SliceType &&
  12615. !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
  12616. return false;
  12617. return true;
  12618. }
  12619. /// Get the offset in bytes of this slice in the original chunk of
  12620. /// bits.
  12621. /// \pre DAG != nullptr.
  12622. uint64_t getOffsetFromBase() const {
  12623. assert(DAG && "Missing context.");
  12624. bool IsBigEndian = DAG->getDataLayout().isBigEndian();
  12625. assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
  12626. uint64_t Offset = Shift / 8;
  12627. unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
  12628. assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
  12629. "The size of the original loaded type is not a multiple of a"
  12630. " byte.");
  12631. // If Offset is bigger than TySizeInBytes, it means we are loading all
  12632. // zeros. This should have been optimized before in the process.
  12633. assert(TySizeInBytes > Offset &&
  12634. "Invalid shift amount for given loaded size");
  12635. if (IsBigEndian)
  12636. Offset = TySizeInBytes - Offset - getLoadedSize();
  12637. return Offset;
  12638. }
  12639. /// Generate the sequence of instructions to load the slice
  12640. /// represented by this object and redirect the uses of this slice to
  12641. /// this new sequence of instructions.
  12642. /// \pre this->Inst && this->Origin are valid Instructions and this
  12643. /// object passed the legal check: LoadedSlice::isLegal returned true.
  12644. /// \return The last instruction of the sequence used to load the slice.
  12645. SDValue loadSlice() const {
  12646. assert(Inst && Origin && "Unable to replace a non-existing slice.");
  12647. const SDValue &OldBaseAddr = Origin->getBasePtr();
  12648. SDValue BaseAddr = OldBaseAddr;
  12649. // Get the offset in that chunk of bytes w.r.t. the endianness.
  12650. int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
  12651. assert(Offset >= 0 && "Offset too big to fit in int64_t!");
  12652. if (Offset) {
  12653. // BaseAddr = BaseAddr + Offset.
  12654. EVT ArithType = BaseAddr.getValueType();
  12655. SDLoc DL(Origin);
  12656. BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
  12657. DAG->getConstant(Offset, DL, ArithType));
  12658. }
  12659. // Create the type of the loaded slice according to its size.
  12660. EVT SliceType = getLoadedType();
  12661. // Create the load for the slice.
  12662. SDValue LastInst =
  12663. DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
  12664. Origin->getPointerInfo().getWithOffset(Offset),
  12665. getAlignment(), Origin->getMemOperand()->getFlags());
  12666. // If the final type is not the same as the loaded type, this means that
  12667. // we have to pad with zero. Create a zero extend for that.
  12668. EVT FinalType = Inst->getValueType(0);
  12669. if (SliceType != FinalType)
  12670. LastInst =
  12671. DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
  12672. return LastInst;
  12673. }
  12674. /// Check if this slice can be merged with an expensive cross register
  12675. /// bank copy. E.g.,
  12676. /// i = load i32
  12677. /// f = bitcast i32 i to float
  12678. bool canMergeExpensiveCrossRegisterBankCopy() const {
  12679. if (!Inst || !Inst->hasOneUse())
  12680. return false;
  12681. SDNode *Use = *Inst->use_begin();
  12682. if (Use->getOpcode() != ISD::BITCAST)
  12683. return false;
  12684. assert(DAG && "Missing context");
  12685. const TargetLowering &TLI = DAG->getTargetLoweringInfo();
  12686. EVT ResVT = Use->getValueType(0);
  12687. const TargetRegisterClass *ResRC =
  12688. TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
  12689. const TargetRegisterClass *ArgRC =
  12690. TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
  12691. Use->getOperand(0)->isDivergent());
  12692. if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
  12693. return false;
  12694. // At this point, we know that we perform a cross-register-bank copy.
  12695. // Check if it is expensive.
  12696. const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
  12697. // Assume bitcasts are cheap, unless both register classes do not
  12698. // explicitly share a common sub class.
  12699. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
  12700. return false;
  12701. // Check if it will be merged with the load.
  12702. // 1. Check the alignment constraint.
  12703. unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
  12704. ResVT.getTypeForEVT(*DAG->getContext()));
  12705. if (RequiredAlignment > getAlignment())
  12706. return false;
  12707. // 2. Check that the load is a legal operation for that type.
  12708. if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
  12709. return false;
  12710. // 3. Check that we do not have a zext in the way.
  12711. if (Inst->getValueType(0) != getLoadedType())
  12712. return false;
  12713. return true;
  12714. }
  12715. };
  12716. } // end anonymous namespace
  12717. /// Check that all bits set in \p UsedBits form a dense region, i.e.,
  12718. /// \p UsedBits looks like 0..0 1..1 0..0.
  12719. static bool areUsedBitsDense(const APInt &UsedBits) {
  12720. // If all the bits are one, this is dense!
  12721. if (UsedBits.isAllOnesValue())
  12722. return true;
  12723. // Get rid of the unused bits on the right.
  12724. APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
  12725. // Get rid of the unused bits on the left.
  12726. if (NarrowedUsedBits.countLeadingZeros())
  12727. NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
  12728. // Check that the chunk of bits is completely used.
  12729. return NarrowedUsedBits.isAllOnesValue();
  12730. }
  12731. /// Check whether or not \p First and \p Second are next to each other
  12732. /// in memory. This means that there is no hole between the bits loaded
  12733. /// by \p First and the bits loaded by \p Second.
  12734. static bool areSlicesNextToEachOther(const LoadedSlice &First,
  12735. const LoadedSlice &Second) {
  12736. assert(First.Origin == Second.Origin && First.Origin &&
  12737. "Unable to match different memory origins.");
  12738. APInt UsedBits = First.getUsedBits();
  12739. assert((UsedBits & Second.getUsedBits()) == 0 &&
  12740. "Slices are not supposed to overlap.");
  12741. UsedBits |= Second.getUsedBits();
  12742. return areUsedBitsDense(UsedBits);
  12743. }
  12744. /// Adjust the \p GlobalLSCost according to the target
  12745. /// paring capabilities and the layout of the slices.
  12746. /// \pre \p GlobalLSCost should account for at least as many loads as
  12747. /// there is in the slices in \p LoadedSlices.
  12748. static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  12749. LoadedSlice::Cost &GlobalLSCost) {
  12750. unsigned NumberOfSlices = LoadedSlices.size();
  12751. // If there is less than 2 elements, no pairing is possible.
  12752. if (NumberOfSlices < 2)
  12753. return;
  12754. // Sort the slices so that elements that are likely to be next to each
  12755. // other in memory are next to each other in the list.
  12756. llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
  12757. assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
  12758. return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
  12759. });
  12760. const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
  12761. // First (resp. Second) is the first (resp. Second) potentially candidate
  12762. // to be placed in a paired load.
  12763. const LoadedSlice *First = nullptr;
  12764. const LoadedSlice *Second = nullptr;
  12765. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
  12766. // Set the beginning of the pair.
  12767. First = Second) {
  12768. Second = &LoadedSlices[CurrSlice];
  12769. // If First is NULL, it means we start a new pair.
  12770. // Get to the next slice.
  12771. if (!First)
  12772. continue;
  12773. EVT LoadedType = First->getLoadedType();
  12774. // If the types of the slices are different, we cannot pair them.
  12775. if (LoadedType != Second->getLoadedType())
  12776. continue;
  12777. // Check if the target supplies paired loads for this type.
  12778. unsigned RequiredAlignment = 0;
  12779. if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
  12780. // move to the next pair, this type is hopeless.
  12781. Second = nullptr;
  12782. continue;
  12783. }
  12784. // Check if we meet the alignment requirement.
  12785. if (RequiredAlignment > First->getAlignment())
  12786. continue;
  12787. // Check that both loads are next to each other in memory.
  12788. if (!areSlicesNextToEachOther(*First, *Second))
  12789. continue;
  12790. assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
  12791. --GlobalLSCost.Loads;
  12792. // Move to the next pair.
  12793. Second = nullptr;
  12794. }
  12795. }
  12796. /// Check the profitability of all involved LoadedSlice.
  12797. /// Currently, it is considered profitable if there is exactly two
  12798. /// involved slices (1) which are (2) next to each other in memory, and
  12799. /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
  12800. ///
  12801. /// Note: The order of the elements in \p LoadedSlices may be modified, but not
  12802. /// the elements themselves.
  12803. ///
  12804. /// FIXME: When the cost model will be mature enough, we can relax
  12805. /// constraints (1) and (2).
  12806. static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  12807. const APInt &UsedBits, bool ForCodeSize) {
  12808. unsigned NumberOfSlices = LoadedSlices.size();
  12809. if (StressLoadSlicing)
  12810. return NumberOfSlices > 1;
  12811. // Check (1).
  12812. if (NumberOfSlices != 2)
  12813. return false;
  12814. // Check (2).
  12815. if (!areUsedBitsDense(UsedBits))
  12816. return false;
  12817. // Check (3).
  12818. LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
  12819. // The original code has one big load.
  12820. OrigCost.Loads = 1;
  12821. for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
  12822. const LoadedSlice &LS = LoadedSlices[CurrSlice];
  12823. // Accumulate the cost of all the slices.
  12824. LoadedSlice::Cost SliceCost(LS, ForCodeSize);
  12825. GlobalSlicingCost += SliceCost;
  12826. // Account as cost in the original configuration the gain obtained
  12827. // with the current slices.
  12828. OrigCost.addSliceGain(LS);
  12829. }
  12830. // If the target supports paired load, adjust the cost accordingly.
  12831. adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
  12832. return OrigCost > GlobalSlicingCost;
  12833. }
  12834. /// If the given load, \p LI, is used only by trunc or trunc(lshr)
  12835. /// operations, split it in the various pieces being extracted.
  12836. ///
  12837. /// This sort of thing is introduced by SROA.
  12838. /// This slicing takes care not to insert overlapping loads.
  12839. /// \pre LI is a simple load (i.e., not an atomic or volatile load).
  12840. bool DAGCombiner::SliceUpLoad(SDNode *N) {
  12841. if (Level < AfterLegalizeDAG)
  12842. return false;
  12843. LoadSDNode *LD = cast<LoadSDNode>(N);
  12844. if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
  12845. !LD->getValueType(0).isInteger())
  12846. return false;
  12847. // Keep track of already used bits to detect overlapping values.
  12848. // In that case, we will just abort the transformation.
  12849. APInt UsedBits(LD->getValueSizeInBits(0), 0);
  12850. SmallVector<LoadedSlice, 4> LoadedSlices;
  12851. // Check if this load is used as several smaller chunks of bits.
  12852. // Basically, look for uses in trunc or trunc(lshr) and record a new chain
  12853. // of computation for each trunc.
  12854. for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
  12855. UI != UIEnd; ++UI) {
  12856. // Skip the uses of the chain.
  12857. if (UI.getUse().getResNo() != 0)
  12858. continue;
  12859. SDNode *User = *UI;
  12860. unsigned Shift = 0;
  12861. // Check if this is a trunc(lshr).
  12862. if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
  12863. isa<ConstantSDNode>(User->getOperand(1))) {
  12864. Shift = User->getConstantOperandVal(1);
  12865. User = *User->use_begin();
  12866. }
  12867. // At this point, User is a Truncate, iff we encountered, trunc or
  12868. // trunc(lshr).
  12869. if (User->getOpcode() != ISD::TRUNCATE)
  12870. return false;
  12871. // The width of the type must be a power of 2 and greater than 8-bits.
  12872. // Otherwise the load cannot be represented in LLVM IR.
  12873. // Moreover, if we shifted with a non-8-bits multiple, the slice
  12874. // will be across several bytes. We do not support that.
  12875. unsigned Width = User->getValueSizeInBits(0);
  12876. if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
  12877. return false;
  12878. // Build the slice for this chain of computations.
  12879. LoadedSlice LS(User, LD, Shift, &DAG);
  12880. APInt CurrentUsedBits = LS.getUsedBits();
  12881. // Check if this slice overlaps with another.
  12882. if ((CurrentUsedBits & UsedBits) != 0)
  12883. return false;
  12884. // Update the bits used globally.
  12885. UsedBits |= CurrentUsedBits;
  12886. // Check if the new slice would be legal.
  12887. if (!LS.isLegal())
  12888. return false;
  12889. // Record the slice.
  12890. LoadedSlices.push_back(LS);
  12891. }
  12892. // Abort slicing if it does not seem to be profitable.
  12893. if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
  12894. return false;
  12895. ++SlicedLoads;
  12896. // Rewrite each chain to use an independent load.
  12897. // By construction, each chain can be represented by a unique load.
  12898. // Prepare the argument for the new token factor for all the slices.
  12899. SmallVector<SDValue, 8> ArgChains;
  12900. for (SmallVectorImpl<LoadedSlice>::const_iterator
  12901. LSIt = LoadedSlices.begin(),
  12902. LSItEnd = LoadedSlices.end();
  12903. LSIt != LSItEnd; ++LSIt) {
  12904. SDValue SliceInst = LSIt->loadSlice();
  12905. CombineTo(LSIt->Inst, SliceInst, true);
  12906. if (SliceInst.getOpcode() != ISD::LOAD)
  12907. SliceInst = SliceInst.getOperand(0);
  12908. assert(SliceInst->getOpcode() == ISD::LOAD &&
  12909. "It takes more than a zext to get to the loaded slice!!");
  12910. ArgChains.push_back(SliceInst.getValue(1));
  12911. }
  12912. SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
  12913. ArgChains);
  12914. DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  12915. AddToWorklist(Chain.getNode());
  12916. return true;
  12917. }
  12918. /// Check to see if V is (and load (ptr), imm), where the load is having
  12919. /// specific bytes cleared out. If so, return the byte size being masked out
  12920. /// and the shift amount.
  12921. static std::pair<unsigned, unsigned>
  12922. CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
  12923. std::pair<unsigned, unsigned> Result(0, 0);
  12924. // Check for the structure we're looking for.
  12925. if (V->getOpcode() != ISD::AND ||
  12926. !isa<ConstantSDNode>(V->getOperand(1)) ||
  12927. !ISD::isNormalLoad(V->getOperand(0).getNode()))
  12928. return Result;
  12929. // Check the chain and pointer.
  12930. LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
  12931. if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
  12932. // This only handles simple types.
  12933. if (V.getValueType() != MVT::i16 &&
  12934. V.getValueType() != MVT::i32 &&
  12935. V.getValueType() != MVT::i64)
  12936. return Result;
  12937. // Check the constant mask. Invert it so that the bits being masked out are
  12938. // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
  12939. // follow the sign bit for uniformity.
  12940. uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
  12941. unsigned NotMaskLZ = countLeadingZeros(NotMask);
  12942. if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
  12943. unsigned NotMaskTZ = countTrailingZeros(NotMask);
  12944. if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
  12945. if (NotMaskLZ == 64) return Result; // All zero mask.
  12946. // See if we have a continuous run of bits. If so, we have 0*1+0*
  12947. if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
  12948. return Result;
  12949. // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
  12950. if (V.getValueType() != MVT::i64 && NotMaskLZ)
  12951. NotMaskLZ -= 64-V.getValueSizeInBits();
  12952. unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
  12953. switch (MaskedBytes) {
  12954. case 1:
  12955. case 2:
  12956. case 4: break;
  12957. default: return Result; // All one mask, or 5-byte mask.
  12958. }
  12959. // Verify that the first bit starts at a multiple of mask so that the access
  12960. // is aligned the same as the access width.
  12961. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
  12962. // For narrowing to be valid, it must be the case that the load the
  12963. // immediately preceding memory operation before the store.
  12964. if (LD == Chain.getNode())
  12965. ; // ok.
  12966. else if (Chain->getOpcode() == ISD::TokenFactor &&
  12967. SDValue(LD, 1).hasOneUse()) {
  12968. // LD has only 1 chain use so they are no indirect dependencies.
  12969. if (!LD->isOperandOf(Chain.getNode()))
  12970. return Result;
  12971. } else
  12972. return Result; // Fail.
  12973. Result.first = MaskedBytes;
  12974. Result.second = NotMaskTZ/8;
  12975. return Result;
  12976. }
  12977. /// Check to see if IVal is something that provides a value as specified by
  12978. /// MaskInfo. If so, replace the specified store with a narrower store of
  12979. /// truncated IVal.
  12980. static SDValue
  12981. ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
  12982. SDValue IVal, StoreSDNode *St,
  12983. DAGCombiner *DC) {
  12984. unsigned NumBytes = MaskInfo.first;
  12985. unsigned ByteShift = MaskInfo.second;
  12986. SelectionDAG &DAG = DC->getDAG();
  12987. // Check to see if IVal is all zeros in the part being masked in by the 'or'
  12988. // that uses this. If not, this is not a replacement.
  12989. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
  12990. ByteShift*8, (ByteShift+NumBytes)*8);
  12991. if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
  12992. // Check that it is legal on the target to do this. It is legal if the new
  12993. // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
  12994. // legalization (and the target doesn't explicitly think this is a bad idea).
  12995. MVT VT = MVT::getIntegerVT(NumBytes * 8);
  12996. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  12997. if (!DC->isTypeLegal(VT))
  12998. return SDValue();
  12999. if (St->getMemOperand() &&
  13000. !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
  13001. *St->getMemOperand()))
  13002. return SDValue();
  13003. // Okay, we can do this! Replace the 'St' store with a store of IVal that is
  13004. // shifted by ByteShift and truncated down to NumBytes.
  13005. if (ByteShift) {
  13006. SDLoc DL(IVal);
  13007. IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
  13008. DAG.getConstant(ByteShift*8, DL,
  13009. DC->getShiftAmountTy(IVal.getValueType())));
  13010. }
  13011. // Figure out the offset for the store and the alignment of the access.
  13012. unsigned StOffset;
  13013. unsigned NewAlign = St->getAlignment();
  13014. if (DAG.getDataLayout().isLittleEndian())
  13015. StOffset = ByteShift;
  13016. else
  13017. StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
  13018. SDValue Ptr = St->getBasePtr();
  13019. if (StOffset) {
  13020. SDLoc DL(IVal);
  13021. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
  13022. Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
  13023. NewAlign = MinAlign(NewAlign, StOffset);
  13024. }
  13025. // Truncate down to the new size.
  13026. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
  13027. ++OpsNarrowed;
  13028. return DAG
  13029. .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
  13030. St->getPointerInfo().getWithOffset(StOffset), NewAlign);
  13031. }
  13032. /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
  13033. /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
  13034. /// narrowing the load and store if it would end up being a win for performance
  13035. /// or code size.
  13036. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  13037. StoreSDNode *ST = cast<StoreSDNode>(N);
  13038. if (!ST->isSimple())
  13039. return SDValue();
  13040. SDValue Chain = ST->getChain();
  13041. SDValue Value = ST->getValue();
  13042. SDValue Ptr = ST->getBasePtr();
  13043. EVT VT = Value.getValueType();
  13044. if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
  13045. return SDValue();
  13046. unsigned Opc = Value.getOpcode();
  13047. // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
  13048. // is a byte mask indicating a consecutive number of bytes, check to see if
  13049. // Y is known to provide just those bytes. If so, we try to replace the
  13050. // load + replace + store sequence with a single (narrower) store, which makes
  13051. // the load dead.
  13052. if (Opc == ISD::OR) {
  13053. std::pair<unsigned, unsigned> MaskedLoad;
  13054. MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
  13055. if (MaskedLoad.first)
  13056. if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  13057. Value.getOperand(1), ST,this))
  13058. return NewST;
  13059. // Or is commutative, so try swapping X and Y.
  13060. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
  13061. if (MaskedLoad.first)
  13062. if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
  13063. Value.getOperand(0), ST,this))
  13064. return NewST;
  13065. }
  13066. if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
  13067. Value.getOperand(1).getOpcode() != ISD::Constant)
  13068. return SDValue();
  13069. SDValue N0 = Value.getOperand(0);
  13070. if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
  13071. Chain == SDValue(N0.getNode(), 1)) {
  13072. LoadSDNode *LD = cast<LoadSDNode>(N0);
  13073. if (LD->getBasePtr() != Ptr ||
  13074. LD->getPointerInfo().getAddrSpace() !=
  13075. ST->getPointerInfo().getAddrSpace())
  13076. return SDValue();
  13077. // Find the type to narrow it the load / op / store to.
  13078. SDValue N1 = Value.getOperand(1);
  13079. unsigned BitWidth = N1.getValueSizeInBits();
  13080. APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
  13081. if (Opc == ISD::AND)
  13082. Imm ^= APInt::getAllOnesValue(BitWidth);
  13083. if (Imm == 0 || Imm.isAllOnesValue())
  13084. return SDValue();
  13085. unsigned ShAmt = Imm.countTrailingZeros();
  13086. unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
  13087. unsigned NewBW = NextPowerOf2(MSB - ShAmt);
  13088. EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  13089. // The narrowing should be profitable, the load/store operation should be
  13090. // legal (or custom) and the store size should be equal to the NewVT width.
  13091. while (NewBW < BitWidth &&
  13092. (NewVT.getStoreSizeInBits() != NewBW ||
  13093. !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
  13094. !TLI.isNarrowingProfitable(VT, NewVT))) {
  13095. NewBW = NextPowerOf2(NewBW);
  13096. NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
  13097. }
  13098. if (NewBW >= BitWidth)
  13099. return SDValue();
  13100. // If the lsb changed does not start at the type bitwidth boundary,
  13101. // start at the previous one.
  13102. if (ShAmt % NewBW)
  13103. ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
  13104. APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
  13105. std::min(BitWidth, ShAmt + NewBW));
  13106. if ((Imm & Mask) == Imm) {
  13107. APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
  13108. if (Opc == ISD::AND)
  13109. NewImm ^= APInt::getAllOnesValue(NewBW);
  13110. uint64_t PtrOff = ShAmt / 8;
  13111. // For big endian targets, we need to adjust the offset to the pointer to
  13112. // load the correct bytes.
  13113. if (DAG.getDataLayout().isBigEndian())
  13114. PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  13115. unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
  13116. Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
  13117. if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
  13118. return SDValue();
  13119. SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
  13120. Ptr.getValueType(), Ptr,
  13121. DAG.getConstant(PtrOff, SDLoc(LD),
  13122. Ptr.getValueType()));
  13123. SDValue NewLD =
  13124. DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
  13125. LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
  13126. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  13127. SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
  13128. DAG.getConstant(NewImm, SDLoc(Value),
  13129. NewVT));
  13130. SDValue NewST =
  13131. DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
  13132. ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
  13133. AddToWorklist(NewPtr.getNode());
  13134. AddToWorklist(NewLD.getNode());
  13135. AddToWorklist(NewVal.getNode());
  13136. WorklistRemover DeadNodes(*this);
  13137. DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
  13138. ++OpsNarrowed;
  13139. return NewST;
  13140. }
  13141. }
  13142. return SDValue();
  13143. }
  13144. /// For a given floating point load / store pair, if the load value isn't used
  13145. /// by any other operations, then consider transforming the pair to integer
  13146. /// load / store operations if the target deems the transformation profitable.
  13147. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
  13148. StoreSDNode *ST = cast<StoreSDNode>(N);
  13149. SDValue Value = ST->getValue();
  13150. if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
  13151. Value.hasOneUse()) {
  13152. LoadSDNode *LD = cast<LoadSDNode>(Value);
  13153. EVT VT = LD->getMemoryVT();
  13154. if (!VT.isFloatingPoint() ||
  13155. VT != ST->getMemoryVT() ||
  13156. LD->isNonTemporal() ||
  13157. ST->isNonTemporal() ||
  13158. LD->getPointerInfo().getAddrSpace() != 0 ||
  13159. ST->getPointerInfo().getAddrSpace() != 0)
  13160. return SDValue();
  13161. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  13162. if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
  13163. !TLI.isOperationLegal(ISD::STORE, IntVT) ||
  13164. !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
  13165. !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
  13166. return SDValue();
  13167. unsigned LDAlign = LD->getAlignment();
  13168. unsigned STAlign = ST->getAlignment();
  13169. Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
  13170. unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
  13171. if (LDAlign < ABIAlign || STAlign < ABIAlign)
  13172. return SDValue();
  13173. SDValue NewLD =
  13174. DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
  13175. LD->getPointerInfo(), LDAlign);
  13176. SDValue NewST =
  13177. DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
  13178. ST->getPointerInfo(), STAlign);
  13179. AddToWorklist(NewLD.getNode());
  13180. AddToWorklist(NewST.getNode());
  13181. WorklistRemover DeadNodes(*this);
  13182. DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
  13183. ++LdStFP2Int;
  13184. return NewST;
  13185. }
  13186. return SDValue();
  13187. }
  13188. // This is a helper function for visitMUL to check the profitability
  13189. // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
  13190. // MulNode is the original multiply, AddNode is (add x, c1),
  13191. // and ConstNode is c2.
  13192. //
  13193. // If the (add x, c1) has multiple uses, we could increase
  13194. // the number of adds if we make this transformation.
  13195. // It would only be worth doing this if we can remove a
  13196. // multiply in the process. Check for that here.
  13197. // To illustrate:
  13198. // (A + c1) * c3
  13199. // (A + c2) * c3
  13200. // We're checking for cases where we have common "c3 * A" expressions.
  13201. bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
  13202. SDValue &AddNode,
  13203. SDValue &ConstNode) {
  13204. APInt Val;
  13205. // If the add only has one use, this would be OK to do.
  13206. if (AddNode.getNode()->hasOneUse())
  13207. return true;
  13208. // Walk all the users of the constant with which we're multiplying.
  13209. for (SDNode *Use : ConstNode->uses()) {
  13210. if (Use == MulNode) // This use is the one we're on right now. Skip it.
  13211. continue;
  13212. if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
  13213. SDNode *OtherOp;
  13214. SDNode *MulVar = AddNode.getOperand(0).getNode();
  13215. // OtherOp is what we're multiplying against the constant.
  13216. if (Use->getOperand(0) == ConstNode)
  13217. OtherOp = Use->getOperand(1).getNode();
  13218. else
  13219. OtherOp = Use->getOperand(0).getNode();
  13220. // Check to see if multiply is with the same operand of our "add".
  13221. //
  13222. // ConstNode = CONST
  13223. // Use = ConstNode * A <-- visiting Use. OtherOp is A.
  13224. // ...
  13225. // AddNode = (A + c1) <-- MulVar is A.
  13226. // = AddNode * ConstNode <-- current visiting instruction.
  13227. //
  13228. // If we make this transformation, we will have a common
  13229. // multiply (ConstNode * A) that we can save.
  13230. if (OtherOp == MulVar)
  13231. return true;
  13232. // Now check to see if a future expansion will give us a common
  13233. // multiply.
  13234. //
  13235. // ConstNode = CONST
  13236. // AddNode = (A + c1)
  13237. // ... = AddNode * ConstNode <-- current visiting instruction.
  13238. // ...
  13239. // OtherOp = (A + c2)
  13240. // Use = OtherOp * ConstNode <-- visiting Use.
  13241. //
  13242. // If we make this transformation, we will have a common
  13243. // multiply (CONST * A) after we also do the same transformation
  13244. // to the "t2" instruction.
  13245. if (OtherOp->getOpcode() == ISD::ADD &&
  13246. DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
  13247. OtherOp->getOperand(0).getNode() == MulVar)
  13248. return true;
  13249. }
  13250. }
  13251. // Didn't find a case where this would be profitable.
  13252. return false;
  13253. }
  13254. SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
  13255. unsigned NumStores) {
  13256. SmallVector<SDValue, 8> Chains;
  13257. SmallPtrSet<const SDNode *, 8> Visited;
  13258. SDLoc StoreDL(StoreNodes[0].MemNode);
  13259. for (unsigned i = 0; i < NumStores; ++i) {
  13260. Visited.insert(StoreNodes[i].MemNode);
  13261. }
  13262. // don't include nodes that are children or repeated nodes.
  13263. for (unsigned i = 0; i < NumStores; ++i) {
  13264. if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
  13265. Chains.push_back(StoreNodes[i].MemNode->getChain());
  13266. }
  13267. assert(Chains.size() > 0 && "Chain should have generated a chain");
  13268. return DAG.getTokenFactor(StoreDL, Chains);
  13269. }
  13270. bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
  13271. SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
  13272. bool IsConstantSrc, bool UseVector, bool UseTrunc) {
  13273. // Make sure we have something to merge.
  13274. if (NumStores < 2)
  13275. return false;
  13276. // The latest Node in the DAG.
  13277. SDLoc DL(StoreNodes[0].MemNode);
  13278. int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
  13279. unsigned SizeInBits = NumStores * ElementSizeBits;
  13280. unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
  13281. EVT StoreTy;
  13282. if (UseVector) {
  13283. unsigned Elts = NumStores * NumMemElts;
  13284. // Get the type for the merged vector store.
  13285. StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
  13286. } else
  13287. StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
  13288. SDValue StoredVal;
  13289. if (UseVector) {
  13290. if (IsConstantSrc) {
  13291. SmallVector<SDValue, 8> BuildVector;
  13292. for (unsigned I = 0; I != NumStores; ++I) {
  13293. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
  13294. SDValue Val = St->getValue();
  13295. // If constant is of the wrong type, convert it now.
  13296. if (MemVT != Val.getValueType()) {
  13297. Val = peekThroughBitcasts(Val);
  13298. // Deal with constants of wrong size.
  13299. if (ElementSizeBits != Val.getValueSizeInBits()) {
  13300. EVT IntMemVT =
  13301. EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
  13302. if (isa<ConstantFPSDNode>(Val)) {
  13303. // Not clear how to truncate FP values.
  13304. return false;
  13305. } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
  13306. Val = DAG.getConstant(C->getAPIntValue()
  13307. .zextOrTrunc(Val.getValueSizeInBits())
  13308. .zextOrTrunc(ElementSizeBits),
  13309. SDLoc(C), IntMemVT);
  13310. }
  13311. // Make sure correctly size type is the correct type.
  13312. Val = DAG.getBitcast(MemVT, Val);
  13313. }
  13314. BuildVector.push_back(Val);
  13315. }
  13316. StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
  13317. : ISD::BUILD_VECTOR,
  13318. DL, StoreTy, BuildVector);
  13319. } else {
  13320. SmallVector<SDValue, 8> Ops;
  13321. for (unsigned i = 0; i < NumStores; ++i) {
  13322. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  13323. SDValue Val = peekThroughBitcasts(St->getValue());
  13324. // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
  13325. // type MemVT. If the underlying value is not the correct
  13326. // type, but it is an extraction of an appropriate vector we
  13327. // can recast Val to be of the correct type. This may require
  13328. // converting between EXTRACT_VECTOR_ELT and
  13329. // EXTRACT_SUBVECTOR.
  13330. if ((MemVT != Val.getValueType()) &&
  13331. (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  13332. Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
  13333. EVT MemVTScalarTy = MemVT.getScalarType();
  13334. // We may need to add a bitcast here to get types to line up.
  13335. if (MemVTScalarTy != Val.getValueType().getScalarType()) {
  13336. Val = DAG.getBitcast(MemVT, Val);
  13337. } else {
  13338. unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
  13339. : ISD::EXTRACT_VECTOR_ELT;
  13340. SDValue Vec = Val.getOperand(0);
  13341. SDValue Idx = Val.getOperand(1);
  13342. Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
  13343. }
  13344. }
  13345. Ops.push_back(Val);
  13346. }
  13347. // Build the extracted vector elements back into a vector.
  13348. StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
  13349. : ISD::BUILD_VECTOR,
  13350. DL, StoreTy, Ops);
  13351. }
  13352. } else {
  13353. // We should always use a vector store when merging extracted vector
  13354. // elements, so this path implies a store of constants.
  13355. assert(IsConstantSrc && "Merged vector elements should use vector store");
  13356. APInt StoreInt(SizeInBits, 0);
  13357. // Construct a single integer constant which is made of the smaller
  13358. // constant inputs.
  13359. bool IsLE = DAG.getDataLayout().isLittleEndian();
  13360. for (unsigned i = 0; i < NumStores; ++i) {
  13361. unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
  13362. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
  13363. SDValue Val = St->getValue();
  13364. Val = peekThroughBitcasts(Val);
  13365. StoreInt <<= ElementSizeBits;
  13366. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
  13367. StoreInt |= C->getAPIntValue()
  13368. .zextOrTrunc(ElementSizeBits)
  13369. .zextOrTrunc(SizeInBits);
  13370. } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
  13371. StoreInt |= C->getValueAPF()
  13372. .bitcastToAPInt()
  13373. .zextOrTrunc(ElementSizeBits)
  13374. .zextOrTrunc(SizeInBits);
  13375. // If fp truncation is necessary give up for now.
  13376. if (MemVT.getSizeInBits() != ElementSizeBits)
  13377. return false;
  13378. } else {
  13379. llvm_unreachable("Invalid constant element type");
  13380. }
  13381. }
  13382. // Create the new Load and Store operations.
  13383. StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
  13384. }
  13385. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  13386. SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
  13387. // make sure we use trunc store if it's necessary to be legal.
  13388. SDValue NewStore;
  13389. if (!UseTrunc) {
  13390. NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
  13391. FirstInChain->getPointerInfo(),
  13392. FirstInChain->getAlignment());
  13393. } else { // Must be realized as a trunc store
  13394. EVT LegalizedStoredValTy =
  13395. TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
  13396. unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
  13397. ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
  13398. SDValue ExtendedStoreVal =
  13399. DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
  13400. LegalizedStoredValTy);
  13401. NewStore = DAG.getTruncStore(
  13402. NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
  13403. FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
  13404. FirstInChain->getAlignment(),
  13405. FirstInChain->getMemOperand()->getFlags());
  13406. }
  13407. // Replace all merged stores with the new store.
  13408. for (unsigned i = 0; i < NumStores; ++i)
  13409. CombineTo(StoreNodes[i].MemNode, NewStore);
  13410. AddToWorklist(NewChain.getNode());
  13411. return true;
  13412. }
  13413. void DAGCombiner::getStoreMergeCandidates(
  13414. StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
  13415. SDNode *&RootNode) {
  13416. // This holds the base pointer, index, and the offset in bytes from the base
  13417. // pointer.
  13418. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
  13419. EVT MemVT = St->getMemoryVT();
  13420. SDValue Val = peekThroughBitcasts(St->getValue());
  13421. // We must have a base and an offset.
  13422. if (!BasePtr.getBase().getNode())
  13423. return;
  13424. // Do not handle stores to undef base pointers.
  13425. if (BasePtr.getBase().isUndef())
  13426. return;
  13427. bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
  13428. bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  13429. Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
  13430. bool IsLoadSrc = isa<LoadSDNode>(Val);
  13431. BaseIndexOffset LBasePtr;
  13432. // Match on loadbaseptr if relevant.
  13433. EVT LoadVT;
  13434. if (IsLoadSrc) {
  13435. auto *Ld = cast<LoadSDNode>(Val);
  13436. LBasePtr = BaseIndexOffset::match(Ld, DAG);
  13437. LoadVT = Ld->getMemoryVT();
  13438. // Load and store should be the same type.
  13439. if (MemVT != LoadVT)
  13440. return;
  13441. // Loads must only have one use.
  13442. if (!Ld->hasNUsesOfValue(1, 0))
  13443. return;
  13444. // The memory operands must not be volatile/indexed/atomic.
  13445. // TODO: May be able to relax for unordered atomics (see D66309)
  13446. if (!Ld->isSimple() || Ld->isIndexed())
  13447. return;
  13448. }
  13449. auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
  13450. int64_t &Offset) -> bool {
  13451. // The memory operands must not be volatile/indexed/atomic.
  13452. // TODO: May be able to relax for unordered atomics (see D66309)
  13453. if (!Other->isSimple() || Other->isIndexed())
  13454. return false;
  13455. // Don't mix temporal stores with non-temporal stores.
  13456. if (St->isNonTemporal() != Other->isNonTemporal())
  13457. return false;
  13458. SDValue OtherBC = peekThroughBitcasts(Other->getValue());
  13459. // Allow merging constants of different types as integers.
  13460. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
  13461. : Other->getMemoryVT() != MemVT;
  13462. if (IsLoadSrc) {
  13463. if (NoTypeMatch)
  13464. return false;
  13465. // The Load's Base Ptr must also match
  13466. if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
  13467. BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
  13468. if (LoadVT != OtherLd->getMemoryVT())
  13469. return false;
  13470. // Loads must only have one use.
  13471. if (!OtherLd->hasNUsesOfValue(1, 0))
  13472. return false;
  13473. // The memory operands must not be volatile/indexed/atomic.
  13474. // TODO: May be able to relax for unordered atomics (see D66309)
  13475. if (!OtherLd->isSimple() ||
  13476. OtherLd->isIndexed())
  13477. return false;
  13478. // Don't mix temporal loads with non-temporal loads.
  13479. if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
  13480. return false;
  13481. if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
  13482. return false;
  13483. } else
  13484. return false;
  13485. }
  13486. if (IsConstantSrc) {
  13487. if (NoTypeMatch)
  13488. return false;
  13489. if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
  13490. return false;
  13491. }
  13492. if (IsExtractVecSrc) {
  13493. // Do not merge truncated stores here.
  13494. if (Other->isTruncatingStore())
  13495. return false;
  13496. if (!MemVT.bitsEq(OtherBC.getValueType()))
  13497. return false;
  13498. if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
  13499. OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  13500. return false;
  13501. }
  13502. Ptr = BaseIndexOffset::match(Other, DAG);
  13503. return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
  13504. };
  13505. // Check if the pair of StoreNode and the RootNode already bail out many
  13506. // times which is over the limit in dependence check.
  13507. auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
  13508. SDNode *RootNode) -> bool {
  13509. auto RootCount = StoreRootCountMap.find(StoreNode);
  13510. if (RootCount != StoreRootCountMap.end() &&
  13511. RootCount->second.first == RootNode &&
  13512. RootCount->second.second > StoreMergeDependenceLimit)
  13513. return true;
  13514. return false;
  13515. };
  13516. // We looking for a root node which is an ancestor to all mergable
  13517. // stores. We search up through a load, to our root and then down
  13518. // through all children. For instance we will find Store{1,2,3} if
  13519. // St is Store1, Store2. or Store3 where the root is not a load
  13520. // which always true for nonvolatile ops. TODO: Expand
  13521. // the search to find all valid candidates through multiple layers of loads.
  13522. //
  13523. // Root
  13524. // |-------|-------|
  13525. // Load Load Store3
  13526. // | |
  13527. // Store1 Store2
  13528. //
  13529. // FIXME: We should be able to climb and
  13530. // descend TokenFactors to find candidates as well.
  13531. RootNode = St->getChain().getNode();
  13532. unsigned NumNodesExplored = 0;
  13533. if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
  13534. RootNode = Ldn->getChain().getNode();
  13535. for (auto I = RootNode->use_begin(), E = RootNode->use_end();
  13536. I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
  13537. if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
  13538. for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
  13539. if (I2.getOperandNo() == 0)
  13540. if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
  13541. BaseIndexOffset Ptr;
  13542. int64_t PtrDiff;
  13543. if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
  13544. !OverLimitInDependenceCheck(OtherST, RootNode))
  13545. StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
  13546. }
  13547. } else
  13548. for (auto I = RootNode->use_begin(), E = RootNode->use_end();
  13549. I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
  13550. if (I.getOperandNo() == 0)
  13551. if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
  13552. BaseIndexOffset Ptr;
  13553. int64_t PtrDiff;
  13554. if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
  13555. !OverLimitInDependenceCheck(OtherST, RootNode))
  13556. StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
  13557. }
  13558. }
  13559. // We need to check that merging these stores does not cause a loop in
  13560. // the DAG. Any store candidate may depend on another candidate
  13561. // indirectly through its operand (we already consider dependencies
  13562. // through the chain). Check in parallel by searching up from
  13563. // non-chain operands of candidates.
  13564. bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
  13565. SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
  13566. SDNode *RootNode) {
  13567. // FIXME: We should be able to truncate a full search of
  13568. // predecessors by doing a BFS and keeping tabs the originating
  13569. // stores from which worklist nodes come from in a similar way to
  13570. // TokenFactor simplfication.
  13571. SmallPtrSet<const SDNode *, 32> Visited;
  13572. SmallVector<const SDNode *, 8> Worklist;
  13573. // RootNode is a predecessor to all candidates so we need not search
  13574. // past it. Add RootNode (peeking through TokenFactors). Do not count
  13575. // these towards size check.
  13576. Worklist.push_back(RootNode);
  13577. while (!Worklist.empty()) {
  13578. auto N = Worklist.pop_back_val();
  13579. if (!Visited.insert(N).second)
  13580. continue; // Already present in Visited.
  13581. if (N->getOpcode() == ISD::TokenFactor) {
  13582. for (SDValue Op : N->ops())
  13583. Worklist.push_back(Op.getNode());
  13584. }
  13585. }
  13586. // Don't count pruning nodes towards max.
  13587. unsigned int Max = 1024 + Visited.size();
  13588. // Search Ops of store candidates.
  13589. for (unsigned i = 0; i < NumStores; ++i) {
  13590. SDNode *N = StoreNodes[i].MemNode;
  13591. // Of the 4 Store Operands:
  13592. // * Chain (Op 0) -> We have already considered these
  13593. // in candidate selection and can be
  13594. // safely ignored
  13595. // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
  13596. // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
  13597. // but aren't necessarily fromt the same base node, so
  13598. // cycles possible (e.g. via indexed store).
  13599. // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
  13600. // non-indexed stores). Not constant on all targets (e.g. ARM)
  13601. // and so can participate in a cycle.
  13602. for (unsigned j = 1; j < N->getNumOperands(); ++j)
  13603. Worklist.push_back(N->getOperand(j).getNode());
  13604. }
  13605. // Search through DAG. We can stop early if we find a store node.
  13606. for (unsigned i = 0; i < NumStores; ++i)
  13607. if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
  13608. Max)) {
  13609. // If the searching bail out, record the StoreNode and RootNode in the
  13610. // StoreRootCountMap. If we have seen the pair many times over a limit,
  13611. // we won't add the StoreNode into StoreNodes set again.
  13612. if (Visited.size() >= Max) {
  13613. auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
  13614. if (RootCount.first == RootNode)
  13615. RootCount.second++;
  13616. else
  13617. RootCount = {RootNode, 1};
  13618. }
  13619. return false;
  13620. }
  13621. return true;
  13622. }
  13623. bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
  13624. if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
  13625. return false;
  13626. EVT MemVT = St->getMemoryVT();
  13627. int64_t ElementSizeBytes = MemVT.getStoreSize();
  13628. unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
  13629. if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
  13630. return false;
  13631. bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
  13632. Attribute::NoImplicitFloat);
  13633. // This function cannot currently deal with non-byte-sized memory sizes.
  13634. if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
  13635. return false;
  13636. if (!MemVT.isSimple())
  13637. return false;
  13638. // Perform an early exit check. Do not bother looking at stored values that
  13639. // are not constants, loads, or extracted vector elements.
  13640. SDValue StoredVal = peekThroughBitcasts(St->getValue());
  13641. bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
  13642. bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
  13643. isa<ConstantFPSDNode>(StoredVal);
  13644. bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  13645. StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
  13646. bool IsNonTemporalStore = St->isNonTemporal();
  13647. bool IsNonTemporalLoad =
  13648. IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
  13649. if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
  13650. return false;
  13651. SmallVector<MemOpLink, 8> StoreNodes;
  13652. SDNode *RootNode;
  13653. // Find potential store merge candidates by searching through chain sub-DAG
  13654. getStoreMergeCandidates(St, StoreNodes, RootNode);
  13655. // Check if there is anything to merge.
  13656. if (StoreNodes.size() < 2)
  13657. return false;
  13658. // Sort the memory operands according to their distance from the
  13659. // base pointer.
  13660. llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
  13661. return LHS.OffsetFromBase < RHS.OffsetFromBase;
  13662. });
  13663. // Store Merge attempts to merge the lowest stores. This generally
  13664. // works out as if successful, as the remaining stores are checked
  13665. // after the first collection of stores is merged. However, in the
  13666. // case that a non-mergeable store is found first, e.g., {p[-2],
  13667. // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
  13668. // mergeable cases. To prevent this, we prune such stores from the
  13669. // front of StoreNodes here.
  13670. bool RV = false;
  13671. while (StoreNodes.size() > 1) {
  13672. size_t StartIdx = 0;
  13673. while ((StartIdx + 1 < StoreNodes.size()) &&
  13674. StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
  13675. StoreNodes[StartIdx + 1].OffsetFromBase)
  13676. ++StartIdx;
  13677. // Bail if we don't have enough candidates to merge.
  13678. if (StartIdx + 1 >= StoreNodes.size())
  13679. return RV;
  13680. if (StartIdx)
  13681. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
  13682. // Scan the memory operations on the chain and find the first
  13683. // non-consecutive store memory address.
  13684. unsigned NumConsecutiveStores = 1;
  13685. int64_t StartAddress = StoreNodes[0].OffsetFromBase;
  13686. // Check that the addresses are consecutive starting from the second
  13687. // element in the list of stores.
  13688. for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
  13689. int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
  13690. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  13691. break;
  13692. NumConsecutiveStores = i + 1;
  13693. }
  13694. if (NumConsecutiveStores < 2) {
  13695. StoreNodes.erase(StoreNodes.begin(),
  13696. StoreNodes.begin() + NumConsecutiveStores);
  13697. continue;
  13698. }
  13699. // The node with the lowest store address.
  13700. LLVMContext &Context = *DAG.getContext();
  13701. const DataLayout &DL = DAG.getDataLayout();
  13702. // Store the constants into memory as one consecutive store.
  13703. if (IsConstantSrc) {
  13704. while (NumConsecutiveStores >= 2) {
  13705. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  13706. unsigned FirstStoreAS = FirstInChain->getAddressSpace();
  13707. unsigned FirstStoreAlign = FirstInChain->getAlignment();
  13708. unsigned LastLegalType = 1;
  13709. unsigned LastLegalVectorType = 1;
  13710. bool LastIntegerTrunc = false;
  13711. bool NonZero = false;
  13712. unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
  13713. for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
  13714. StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
  13715. SDValue StoredVal = ST->getValue();
  13716. bool IsElementZero = false;
  13717. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
  13718. IsElementZero = C->isNullValue();
  13719. else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
  13720. IsElementZero = C->getConstantFPValue()->isNullValue();
  13721. if (IsElementZero) {
  13722. if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
  13723. FirstZeroAfterNonZero = i;
  13724. }
  13725. NonZero |= !IsElementZero;
  13726. // Find a legal type for the constant store.
  13727. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
  13728. EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
  13729. bool IsFast = false;
  13730. // Break early when size is too large to be legal.
  13731. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
  13732. break;
  13733. if (TLI.isTypeLegal(StoreTy) &&
  13734. TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
  13735. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13736. *FirstInChain->getMemOperand(), &IsFast) &&
  13737. IsFast) {
  13738. LastIntegerTrunc = false;
  13739. LastLegalType = i + 1;
  13740. // Or check whether a truncstore is legal.
  13741. } else if (TLI.getTypeAction(Context, StoreTy) ==
  13742. TargetLowering::TypePromoteInteger) {
  13743. EVT LegalizedStoredValTy =
  13744. TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
  13745. if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
  13746. TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
  13747. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13748. *FirstInChain->getMemOperand(),
  13749. &IsFast) &&
  13750. IsFast) {
  13751. LastIntegerTrunc = true;
  13752. LastLegalType = i + 1;
  13753. }
  13754. }
  13755. // We only use vectors if the constant is known to be zero or the
  13756. // target allows it and the function is not marked with the
  13757. // noimplicitfloat attribute.
  13758. if ((!NonZero ||
  13759. TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
  13760. !NoVectors) {
  13761. // Find a legal type for the vector store.
  13762. unsigned Elts = (i + 1) * NumMemElts;
  13763. EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
  13764. if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
  13765. TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
  13766. TLI.allowsMemoryAccess(
  13767. Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
  13768. IsFast)
  13769. LastLegalVectorType = i + 1;
  13770. }
  13771. }
  13772. bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
  13773. unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
  13774. // Check if we found a legal integer type that creates a meaningful
  13775. // merge.
  13776. if (NumElem < 2) {
  13777. // We know that candidate stores are in order and of correct
  13778. // shape. While there is no mergeable sequence from the
  13779. // beginning one may start later in the sequence. The only
  13780. // reason a merge of size N could have failed where another of
  13781. // the same size would not have, is if the alignment has
  13782. // improved or we've dropped a non-zero value. Drop as many
  13783. // candidates as we can here.
  13784. unsigned NumSkip = 1;
  13785. while (
  13786. (NumSkip < NumConsecutiveStores) &&
  13787. (NumSkip < FirstZeroAfterNonZero) &&
  13788. (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
  13789. NumSkip++;
  13790. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
  13791. NumConsecutiveStores -= NumSkip;
  13792. continue;
  13793. }
  13794. // Check that we can merge these candidates without causing a cycle.
  13795. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
  13796. RootNode)) {
  13797. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
  13798. NumConsecutiveStores -= NumElem;
  13799. continue;
  13800. }
  13801. RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
  13802. UseVector, LastIntegerTrunc);
  13803. // Remove merged stores for next iteration.
  13804. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
  13805. NumConsecutiveStores -= NumElem;
  13806. }
  13807. continue;
  13808. }
  13809. // When extracting multiple vector elements, try to store them
  13810. // in one vector store rather than a sequence of scalar stores.
  13811. if (IsExtractVecSrc) {
  13812. // Loop on Consecutive Stores on success.
  13813. while (NumConsecutiveStores >= 2) {
  13814. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  13815. unsigned FirstStoreAS = FirstInChain->getAddressSpace();
  13816. unsigned FirstStoreAlign = FirstInChain->getAlignment();
  13817. unsigned NumStoresToMerge = 1;
  13818. for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
  13819. // Find a legal type for the vector store.
  13820. unsigned Elts = (i + 1) * NumMemElts;
  13821. EVT Ty =
  13822. EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
  13823. bool IsFast;
  13824. // Break early when size is too large to be legal.
  13825. if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
  13826. break;
  13827. if (TLI.isTypeLegal(Ty) &&
  13828. TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
  13829. TLI.allowsMemoryAccess(Context, DL, Ty,
  13830. *FirstInChain->getMemOperand(), &IsFast) &&
  13831. IsFast)
  13832. NumStoresToMerge = i + 1;
  13833. }
  13834. // Check if we found a legal integer type creating a meaningful
  13835. // merge.
  13836. if (NumStoresToMerge < 2) {
  13837. // We know that candidate stores are in order and of correct
  13838. // shape. While there is no mergeable sequence from the
  13839. // beginning one may start later in the sequence. The only
  13840. // reason a merge of size N could have failed where another of
  13841. // the same size would not have, is if the alignment has
  13842. // improved. Drop as many candidates as we can here.
  13843. unsigned NumSkip = 1;
  13844. while (
  13845. (NumSkip < NumConsecutiveStores) &&
  13846. (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
  13847. NumSkip++;
  13848. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
  13849. NumConsecutiveStores -= NumSkip;
  13850. continue;
  13851. }
  13852. // Check that we can merge these candidates without causing a cycle.
  13853. if (!checkMergeStoreCandidatesForDependencies(
  13854. StoreNodes, NumStoresToMerge, RootNode)) {
  13855. StoreNodes.erase(StoreNodes.begin(),
  13856. StoreNodes.begin() + NumStoresToMerge);
  13857. NumConsecutiveStores -= NumStoresToMerge;
  13858. continue;
  13859. }
  13860. RV |= MergeStoresOfConstantsOrVecElts(
  13861. StoreNodes, MemVT, NumStoresToMerge, false, true, false);
  13862. StoreNodes.erase(StoreNodes.begin(),
  13863. StoreNodes.begin() + NumStoresToMerge);
  13864. NumConsecutiveStores -= NumStoresToMerge;
  13865. }
  13866. continue;
  13867. }
  13868. // Below we handle the case of multiple consecutive stores that
  13869. // come from multiple consecutive loads. We merge them into a single
  13870. // wide load and a single wide store.
  13871. // Look for load nodes which are used by the stored values.
  13872. SmallVector<MemOpLink, 8> LoadNodes;
  13873. // Find acceptable loads. Loads need to have the same chain (token factor),
  13874. // must not be zext, volatile, indexed, and they must be consecutive.
  13875. BaseIndexOffset LdBasePtr;
  13876. for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
  13877. StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
  13878. SDValue Val = peekThroughBitcasts(St->getValue());
  13879. LoadSDNode *Ld = cast<LoadSDNode>(Val);
  13880. BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
  13881. // If this is not the first ptr that we check.
  13882. int64_t LdOffset = 0;
  13883. if (LdBasePtr.getBase().getNode()) {
  13884. // The base ptr must be the same.
  13885. if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
  13886. break;
  13887. } else {
  13888. // Check that all other base pointers are the same as this one.
  13889. LdBasePtr = LdPtr;
  13890. }
  13891. // We found a potential memory operand to merge.
  13892. LoadNodes.push_back(MemOpLink(Ld, LdOffset));
  13893. }
  13894. while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
  13895. // If we have load/store pair instructions and we only have two values,
  13896. // don't bother merging.
  13897. unsigned RequiredAlignment;
  13898. if (LoadNodes.size() == 2 &&
  13899. TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
  13900. StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
  13901. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
  13902. LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
  13903. break;
  13904. }
  13905. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
  13906. unsigned FirstStoreAS = FirstInChain->getAddressSpace();
  13907. unsigned FirstStoreAlign = FirstInChain->getAlignment();
  13908. LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
  13909. unsigned FirstLoadAlign = FirstLoad->getAlignment();
  13910. // Scan the memory operations on the chain and find the first
  13911. // non-consecutive load memory address. These variables hold the index in
  13912. // the store node array.
  13913. unsigned LastConsecutiveLoad = 1;
  13914. // This variable refers to the size and not index in the array.
  13915. unsigned LastLegalVectorType = 1;
  13916. unsigned LastLegalIntegerType = 1;
  13917. bool isDereferenceable = true;
  13918. bool DoIntegerTruncate = false;
  13919. StartAddress = LoadNodes[0].OffsetFromBase;
  13920. SDValue FirstChain = FirstLoad->getChain();
  13921. for (unsigned i = 1; i < LoadNodes.size(); ++i) {
  13922. // All loads must share the same chain.
  13923. if (LoadNodes[i].MemNode->getChain() != FirstChain)
  13924. break;
  13925. int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
  13926. if (CurrAddress - StartAddress != (ElementSizeBytes * i))
  13927. break;
  13928. LastConsecutiveLoad = i;
  13929. if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
  13930. isDereferenceable = false;
  13931. // Find a legal type for the vector store.
  13932. unsigned Elts = (i + 1) * NumMemElts;
  13933. EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
  13934. // Break early when size is too large to be legal.
  13935. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
  13936. break;
  13937. bool IsFastSt, IsFastLd;
  13938. if (TLI.isTypeLegal(StoreTy) &&
  13939. TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
  13940. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13941. *FirstInChain->getMemOperand(), &IsFastSt) &&
  13942. IsFastSt &&
  13943. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13944. *FirstLoad->getMemOperand(), &IsFastLd) &&
  13945. IsFastLd) {
  13946. LastLegalVectorType = i + 1;
  13947. }
  13948. // Find a legal type for the integer store.
  13949. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
  13950. StoreTy = EVT::getIntegerVT(Context, SizeInBits);
  13951. if (TLI.isTypeLegal(StoreTy) &&
  13952. TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
  13953. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13954. *FirstInChain->getMemOperand(), &IsFastSt) &&
  13955. IsFastSt &&
  13956. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13957. *FirstLoad->getMemOperand(), &IsFastLd) &&
  13958. IsFastLd) {
  13959. LastLegalIntegerType = i + 1;
  13960. DoIntegerTruncate = false;
  13961. // Or check whether a truncstore and extload is legal.
  13962. } else if (TLI.getTypeAction(Context, StoreTy) ==
  13963. TargetLowering::TypePromoteInteger) {
  13964. EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
  13965. if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
  13966. TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
  13967. TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
  13968. StoreTy) &&
  13969. TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
  13970. StoreTy) &&
  13971. TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
  13972. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13973. *FirstInChain->getMemOperand(),
  13974. &IsFastSt) &&
  13975. IsFastSt &&
  13976. TLI.allowsMemoryAccess(Context, DL, StoreTy,
  13977. *FirstLoad->getMemOperand(), &IsFastLd) &&
  13978. IsFastLd) {
  13979. LastLegalIntegerType = i + 1;
  13980. DoIntegerTruncate = true;
  13981. }
  13982. }
  13983. }
  13984. // Only use vector types if the vector type is larger than the integer
  13985. // type. If they are the same, use integers.
  13986. bool UseVectorTy =
  13987. LastLegalVectorType > LastLegalIntegerType && !NoVectors;
  13988. unsigned LastLegalType =
  13989. std::max(LastLegalVectorType, LastLegalIntegerType);
  13990. // We add +1 here because the LastXXX variables refer to location while
  13991. // the NumElem refers to array/index size.
  13992. unsigned NumElem =
  13993. std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
  13994. NumElem = std::min(LastLegalType, NumElem);
  13995. if (NumElem < 2) {
  13996. // We know that candidate stores are in order and of correct
  13997. // shape. While there is no mergeable sequence from the
  13998. // beginning one may start later in the sequence. The only
  13999. // reason a merge of size N could have failed where another of
  14000. // the same size would not have is if the alignment or either
  14001. // the load or store has improved. Drop as many candidates as we
  14002. // can here.
  14003. unsigned NumSkip = 1;
  14004. while ((NumSkip < LoadNodes.size()) &&
  14005. (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
  14006. (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
  14007. NumSkip++;
  14008. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
  14009. LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
  14010. NumConsecutiveStores -= NumSkip;
  14011. continue;
  14012. }
  14013. // Check that we can merge these candidates without causing a cycle.
  14014. if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
  14015. RootNode)) {
  14016. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
  14017. LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
  14018. NumConsecutiveStores -= NumElem;
  14019. continue;
  14020. }
  14021. // Find if it is better to use vectors or integers to load and store
  14022. // to memory.
  14023. EVT JointMemOpVT;
  14024. if (UseVectorTy) {
  14025. // Find a legal type for the vector store.
  14026. unsigned Elts = NumElem * NumMemElts;
  14027. JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
  14028. } else {
  14029. unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
  14030. JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
  14031. }
  14032. SDLoc LoadDL(LoadNodes[0].MemNode);
  14033. SDLoc StoreDL(StoreNodes[0].MemNode);
  14034. // The merged loads are required to have the same incoming chain, so
  14035. // using the first's chain is acceptable.
  14036. SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
  14037. AddToWorklist(NewStoreChain.getNode());
  14038. MachineMemOperand::Flags LdMMOFlags =
  14039. isDereferenceable ? MachineMemOperand::MODereferenceable
  14040. : MachineMemOperand::MONone;
  14041. if (IsNonTemporalLoad)
  14042. LdMMOFlags |= MachineMemOperand::MONonTemporal;
  14043. MachineMemOperand::Flags StMMOFlags =
  14044. IsNonTemporalStore ? MachineMemOperand::MONonTemporal
  14045. : MachineMemOperand::MONone;
  14046. SDValue NewLoad, NewStore;
  14047. if (UseVectorTy || !DoIntegerTruncate) {
  14048. NewLoad =
  14049. DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
  14050. FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
  14051. FirstLoadAlign, LdMMOFlags);
  14052. NewStore = DAG.getStore(
  14053. NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
  14054. FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
  14055. } else { // This must be the truncstore/extload case
  14056. EVT ExtendedTy =
  14057. TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
  14058. NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
  14059. FirstLoad->getChain(), FirstLoad->getBasePtr(),
  14060. FirstLoad->getPointerInfo(), JointMemOpVT,
  14061. FirstLoadAlign, LdMMOFlags);
  14062. NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
  14063. FirstInChain->getBasePtr(),
  14064. FirstInChain->getPointerInfo(),
  14065. JointMemOpVT, FirstInChain->getAlignment(),
  14066. FirstInChain->getMemOperand()->getFlags());
  14067. }
  14068. // Transfer chain users from old loads to the new load.
  14069. for (unsigned i = 0; i < NumElem; ++i) {
  14070. LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
  14071. DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
  14072. SDValue(NewLoad.getNode(), 1));
  14073. }
  14074. // Replace the all stores with the new store. Recursively remove
  14075. // corresponding value if its no longer used.
  14076. for (unsigned i = 0; i < NumElem; ++i) {
  14077. SDValue Val = StoreNodes[i].MemNode->getOperand(1);
  14078. CombineTo(StoreNodes[i].MemNode, NewStore);
  14079. if (Val.getNode()->use_empty())
  14080. recursivelyDeleteUnusedNodes(Val.getNode());
  14081. }
  14082. RV = true;
  14083. StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
  14084. LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
  14085. NumConsecutiveStores -= NumElem;
  14086. }
  14087. }
  14088. return RV;
  14089. }
  14090. SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
  14091. SDLoc SL(ST);
  14092. SDValue ReplStore;
  14093. // Replace the chain to avoid dependency.
  14094. if (ST->isTruncatingStore()) {
  14095. ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
  14096. ST->getBasePtr(), ST->getMemoryVT(),
  14097. ST->getMemOperand());
  14098. } else {
  14099. ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
  14100. ST->getMemOperand());
  14101. }
  14102. // Create token to keep both nodes around.
  14103. SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
  14104. MVT::Other, ST->getChain(), ReplStore);
  14105. // Make sure the new and old chains are cleaned up.
  14106. AddToWorklist(Token.getNode());
  14107. // Don't add users to work list.
  14108. return CombineTo(ST, Token, false);
  14109. }
  14110. SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
  14111. SDValue Value = ST->getValue();
  14112. if (Value.getOpcode() == ISD::TargetConstantFP)
  14113. return SDValue();
  14114. SDLoc DL(ST);
  14115. SDValue Chain = ST->getChain();
  14116. SDValue Ptr = ST->getBasePtr();
  14117. const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
  14118. // NOTE: If the original store is volatile, this transform must not increase
  14119. // the number of stores. For example, on x86-32 an f64 can be stored in one
  14120. // processor operation but an i64 (which is not legal) requires two. So the
  14121. // transform should not be done in this case.
  14122. SDValue Tmp;
  14123. switch (CFP->getSimpleValueType(0).SimpleTy) {
  14124. default:
  14125. llvm_unreachable("Unknown FP type");
  14126. case MVT::f16: // We don't do this for these yet.
  14127. case MVT::f80:
  14128. case MVT::f128:
  14129. case MVT::ppcf128:
  14130. return SDValue();
  14131. case MVT::f32:
  14132. if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
  14133. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  14134. ;
  14135. Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
  14136. bitcastToAPInt().getZExtValue(), SDLoc(CFP),
  14137. MVT::i32);
  14138. return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
  14139. }
  14140. return SDValue();
  14141. case MVT::f64:
  14142. if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
  14143. ST->isSimple()) ||
  14144. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
  14145. ;
  14146. Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
  14147. getZExtValue(), SDLoc(CFP), MVT::i64);
  14148. return DAG.getStore(Chain, DL, Tmp,
  14149. Ptr, ST->getMemOperand());
  14150. }
  14151. if (ST->isSimple() &&
  14152. TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
  14153. // Many FP stores are not made apparent until after legalize, e.g. for
  14154. // argument passing. Since this is so common, custom legalize the
  14155. // 64-bit integer store into two 32-bit stores.
  14156. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
  14157. SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
  14158. SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
  14159. if (DAG.getDataLayout().isBigEndian())
  14160. std::swap(Lo, Hi);
  14161. unsigned Alignment = ST->getAlignment();
  14162. MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
  14163. AAMDNodes AAInfo = ST->getAAInfo();
  14164. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
  14165. ST->getAlignment(), MMOFlags, AAInfo);
  14166. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
  14167. DAG.getConstant(4, DL, Ptr.getValueType()));
  14168. Alignment = MinAlign(Alignment, 4U);
  14169. SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
  14170. ST->getPointerInfo().getWithOffset(4),
  14171. Alignment, MMOFlags, AAInfo);
  14172. return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
  14173. St0, St1);
  14174. }
  14175. return SDValue();
  14176. }
  14177. }
  14178. SDValue DAGCombiner::visitSTORE(SDNode *N) {
  14179. StoreSDNode *ST = cast<StoreSDNode>(N);
  14180. SDValue Chain = ST->getChain();
  14181. SDValue Value = ST->getValue();
  14182. SDValue Ptr = ST->getBasePtr();
  14183. // If this is a store of a bit convert, store the input value if the
  14184. // resultant store does not need a higher alignment than the original.
  14185. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
  14186. ST->isUnindexed()) {
  14187. EVT SVT = Value.getOperand(0).getValueType();
  14188. // If the store is volatile, we only want to change the store type if the
  14189. // resulting store is legal. Otherwise we might increase the number of
  14190. // memory accesses. We don't care if the original type was legal or not
  14191. // as we assume software couldn't rely on the number of accesses of an
  14192. // illegal type.
  14193. // TODO: May be able to relax for unordered atomics (see D66309)
  14194. if (((!LegalOperations && ST->isSimple()) ||
  14195. TLI.isOperationLegal(ISD::STORE, SVT)) &&
  14196. TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
  14197. DAG, *ST->getMemOperand())) {
  14198. return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
  14199. ST->getPointerInfo(), ST->getAlignment(),
  14200. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  14201. }
  14202. }
  14203. // Turn 'store undef, Ptr' -> nothing.
  14204. if (Value.isUndef() && ST->isUnindexed())
  14205. return Chain;
  14206. // Try to infer better alignment information than the store already has.
  14207. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
  14208. if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
  14209. if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
  14210. SDValue NewStore =
  14211. DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
  14212. ST->getMemoryVT(), Align,
  14213. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  14214. // NewStore will always be N as we are only refining the alignment
  14215. assert(NewStore.getNode() == N);
  14216. (void)NewStore;
  14217. }
  14218. }
  14219. }
  14220. // Try transforming a pair floating point load / store ops to integer
  14221. // load / store ops.
  14222. if (SDValue NewST = TransformFPLoadStorePair(N))
  14223. return NewST;
  14224. // Try transforming several stores into STORE (BSWAP).
  14225. if (SDValue Store = MatchStoreCombine(ST))
  14226. return Store;
  14227. if (ST->isUnindexed()) {
  14228. // Walk up chain skipping non-aliasing memory nodes, on this store and any
  14229. // adjacent stores.
  14230. if (findBetterNeighborChains(ST)) {
  14231. // replaceStoreChain uses CombineTo, which handled all of the worklist
  14232. // manipulation. Return the original node to not do anything else.
  14233. return SDValue(ST, 0);
  14234. }
  14235. Chain = ST->getChain();
  14236. }
  14237. // FIXME: is there such a thing as a truncating indexed store?
  14238. if (ST->isTruncatingStore() && ST->isUnindexed() &&
  14239. Value.getValueType().isInteger() &&
  14240. (!isa<ConstantSDNode>(Value) ||
  14241. !cast<ConstantSDNode>(Value)->isOpaque())) {
  14242. APInt TruncDemandedBits =
  14243. APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
  14244. ST->getMemoryVT().getScalarSizeInBits());
  14245. // See if we can simplify the input to this truncstore with knowledge that
  14246. // only the low bits are being used. For example:
  14247. // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
  14248. AddToWorklist(Value.getNode());
  14249. if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
  14250. return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
  14251. ST->getMemOperand());
  14252. // Otherwise, see if we can simplify the operation with
  14253. // SimplifyDemandedBits, which only works if the value has a single use.
  14254. if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
  14255. // Re-visit the store if anything changed and the store hasn't been merged
  14256. // with another node (N is deleted) SimplifyDemandedBits will add Value's
  14257. // node back to the worklist if necessary, but we also need to re-visit
  14258. // the Store node itself.
  14259. if (N->getOpcode() != ISD::DELETED_NODE)
  14260. AddToWorklist(N);
  14261. return SDValue(N, 0);
  14262. }
  14263. }
  14264. // If this is a load followed by a store to the same location, then the store
  14265. // is dead/noop.
  14266. // TODO: Can relax for unordered atomics (see D66309)
  14267. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
  14268. if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
  14269. ST->isUnindexed() && ST->isSimple() &&
  14270. // There can't be any side effects between the load and store, such as
  14271. // a call or store.
  14272. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
  14273. // The store is dead, remove it.
  14274. return Chain;
  14275. }
  14276. }
  14277. // TODO: Can relax for unordered atomics (see D66309)
  14278. if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
  14279. if (ST->isUnindexed() && ST->isSimple() &&
  14280. ST1->isUnindexed() && ST1->isSimple()) {
  14281. if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
  14282. ST->getMemoryVT() == ST1->getMemoryVT()) {
  14283. // If this is a store followed by a store with the same value to the
  14284. // same location, then the store is dead/noop.
  14285. return Chain;
  14286. }
  14287. if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
  14288. !ST1->getBasePtr().isUndef()) {
  14289. const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
  14290. const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
  14291. unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
  14292. unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
  14293. // If this is a store who's preceding store to a subset of the current
  14294. // location and no one other node is chained to that store we can
  14295. // effectively drop the store. Do not remove stores to undef as they may
  14296. // be used as data sinks.
  14297. if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
  14298. CombineTo(ST1, ST1->getChain());
  14299. return SDValue();
  14300. }
  14301. // If ST stores to a subset of preceding store's write set, we may be
  14302. // able to fold ST's value into the preceding stored value. As we know
  14303. // the other uses of ST1's chain are unconcerned with ST, this folding
  14304. // will not affect those nodes.
  14305. int64_t BitOffset;
  14306. if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
  14307. BitOffset)) {
  14308. SDValue ChainValue = ST1->getValue();
  14309. if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
  14310. if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
  14311. APInt Val = C1->getAPIntValue();
  14312. APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
  14313. // FIXME: Handle Big-endian mode.
  14314. if (!DAG.getDataLayout().isBigEndian()) {
  14315. Val.insertBits(InsertVal, BitOffset);
  14316. SDValue NewSDVal =
  14317. DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
  14318. C1->isTargetOpcode(), C1->isOpaque());
  14319. SDNode *NewST1 = DAG.UpdateNodeOperands(
  14320. ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
  14321. ST1->getOperand(3));
  14322. return CombineTo(ST, SDValue(NewST1, 0));
  14323. }
  14324. }
  14325. }
  14326. } // End ST subset of ST1 case.
  14327. }
  14328. }
  14329. }
  14330. // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
  14331. // truncating store. We can do this even if this is already a truncstore.
  14332. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
  14333. && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
  14334. TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
  14335. ST->getMemoryVT())) {
  14336. return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
  14337. Ptr, ST->getMemoryVT(), ST->getMemOperand());
  14338. }
  14339. // Always perform this optimization before types are legal. If the target
  14340. // prefers, also try this after legalization to catch stores that were created
  14341. // by intrinsics or other nodes.
  14342. if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
  14343. while (true) {
  14344. // There can be multiple store sequences on the same chain.
  14345. // Keep trying to merge store sequences until we are unable to do so
  14346. // or until we merge the last store on the chain.
  14347. bool Changed = MergeConsecutiveStores(ST);
  14348. if (!Changed) break;
  14349. // Return N as merge only uses CombineTo and no worklist clean
  14350. // up is necessary.
  14351. if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
  14352. return SDValue(N, 0);
  14353. }
  14354. }
  14355. // Try transforming N to an indexed store.
  14356. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
  14357. return SDValue(N, 0);
  14358. // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
  14359. //
  14360. // Make sure to do this only after attempting to merge stores in order to
  14361. // avoid changing the types of some subset of stores due to visit order,
  14362. // preventing their merging.
  14363. if (isa<ConstantFPSDNode>(ST->getValue())) {
  14364. if (SDValue NewSt = replaceStoreOfFPConstant(ST))
  14365. return NewSt;
  14366. }
  14367. if (SDValue NewSt = splitMergedValStore(ST))
  14368. return NewSt;
  14369. return ReduceLoadOpStoreWidth(N);
  14370. }
  14371. SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
  14372. const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
  14373. if (!LifetimeEnd->hasOffset())
  14374. return SDValue();
  14375. const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
  14376. LifetimeEnd->getOffset(), false);
  14377. // We walk up the chains to find stores.
  14378. SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
  14379. while (!Chains.empty()) {
  14380. SDValue Chain = Chains.back();
  14381. Chains.pop_back();
  14382. if (!Chain.hasOneUse())
  14383. continue;
  14384. switch (Chain.getOpcode()) {
  14385. case ISD::TokenFactor:
  14386. for (unsigned Nops = Chain.getNumOperands(); Nops;)
  14387. Chains.push_back(Chain.getOperand(--Nops));
  14388. break;
  14389. case ISD::LIFETIME_START:
  14390. case ISD::LIFETIME_END:
  14391. // We can forward past any lifetime start/end that can be proven not to
  14392. // alias the node.
  14393. if (!isAlias(Chain.getNode(), N))
  14394. Chains.push_back(Chain.getOperand(0));
  14395. break;
  14396. case ISD::STORE: {
  14397. StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
  14398. // TODO: Can relax for unordered atomics (see D66309)
  14399. if (!ST->isSimple() || ST->isIndexed())
  14400. continue;
  14401. const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
  14402. // If we store purely within object bounds just before its lifetime ends,
  14403. // we can remove the store.
  14404. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
  14405. ST->getMemoryVT().getStoreSizeInBits())) {
  14406. LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
  14407. dbgs() << "\nwithin LIFETIME_END of : ";
  14408. LifetimeEndBase.dump(); dbgs() << "\n");
  14409. CombineTo(ST, ST->getChain());
  14410. return SDValue(N, 0);
  14411. }
  14412. }
  14413. }
  14414. }
  14415. return SDValue();
  14416. }
  14417. /// For the instruction sequence of store below, F and I values
  14418. /// are bundled together as an i64 value before being stored into memory.
  14419. /// Sometimes it is more efficent to generate separate stores for F and I,
  14420. /// which can remove the bitwise instructions or sink them to colder places.
  14421. ///
  14422. /// (store (or (zext (bitcast F to i32) to i64),
  14423. /// (shl (zext I to i64), 32)), addr) -->
  14424. /// (store F, addr) and (store I, addr+4)
  14425. ///
  14426. /// Similarly, splitting for other merged store can also be beneficial, like:
  14427. /// For pair of {i32, i32}, i64 store --> two i32 stores.
  14428. /// For pair of {i32, i16}, i64 store --> two i32 stores.
  14429. /// For pair of {i16, i16}, i32 store --> two i16 stores.
  14430. /// For pair of {i16, i8}, i32 store --> two i16 stores.
  14431. /// For pair of {i8, i8}, i16 store --> two i8 stores.
  14432. ///
  14433. /// We allow each target to determine specifically which kind of splitting is
  14434. /// supported.
  14435. ///
  14436. /// The store patterns are commonly seen from the simple code snippet below
  14437. /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
  14438. /// void goo(const std::pair<int, float> &);
  14439. /// hoo() {
  14440. /// ...
  14441. /// goo(std::make_pair(tmp, ftmp));
  14442. /// ...
  14443. /// }
  14444. ///
  14445. SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
  14446. if (OptLevel == CodeGenOpt::None)
  14447. return SDValue();
  14448. SDValue Val = ST->getValue();
  14449. SDLoc DL(ST);
  14450. // Match OR operand.
  14451. if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
  14452. return SDValue();
  14453. // Match SHL operand and get Lower and Higher parts of Val.
  14454. SDValue Op1 = Val.getOperand(0);
  14455. SDValue Op2 = Val.getOperand(1);
  14456. SDValue Lo, Hi;
  14457. if (Op1.getOpcode() != ISD::SHL) {
  14458. std::swap(Op1, Op2);
  14459. if (Op1.getOpcode() != ISD::SHL)
  14460. return SDValue();
  14461. }
  14462. Lo = Op2;
  14463. Hi = Op1.getOperand(0);
  14464. if (!Op1.hasOneUse())
  14465. return SDValue();
  14466. // Match shift amount to HalfValBitSize.
  14467. unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
  14468. ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
  14469. if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
  14470. return SDValue();
  14471. // Lo and Hi are zero-extended from int with size less equal than 32
  14472. // to i64.
  14473. if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
  14474. !Lo.getOperand(0).getValueType().isScalarInteger() ||
  14475. Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
  14476. Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
  14477. !Hi.getOperand(0).getValueType().isScalarInteger() ||
  14478. Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
  14479. return SDValue();
  14480. // Use the EVT of low and high parts before bitcast as the input
  14481. // of target query.
  14482. EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
  14483. ? Lo.getOperand(0).getValueType()
  14484. : Lo.getValueType();
  14485. EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
  14486. ? Hi.getOperand(0).getValueType()
  14487. : Hi.getValueType();
  14488. if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
  14489. return SDValue();
  14490. // Start to split store.
  14491. unsigned Alignment = ST->getAlignment();
  14492. MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
  14493. AAMDNodes AAInfo = ST->getAAInfo();
  14494. // Change the sizes of Lo and Hi's value types to HalfValBitSize.
  14495. EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
  14496. Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
  14497. Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
  14498. SDValue Chain = ST->getChain();
  14499. SDValue Ptr = ST->getBasePtr();
  14500. // Lower value store.
  14501. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
  14502. ST->getAlignment(), MMOFlags, AAInfo);
  14503. Ptr =
  14504. DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
  14505. DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
  14506. // Higher value store.
  14507. SDValue St1 =
  14508. DAG.getStore(St0, DL, Hi, Ptr,
  14509. ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
  14510. Alignment / 2, MMOFlags, AAInfo);
  14511. return St1;
  14512. }
  14513. /// Convert a disguised subvector insertion into a shuffle:
  14514. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
  14515. SDValue InsertVal = N->getOperand(1);
  14516. SDValue Vec = N->getOperand(0);
  14517. // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
  14518. // --> (vector_shuffle X, Y)
  14519. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
  14520. InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  14521. isa<ConstantSDNode>(InsertVal.getOperand(1))) {
  14522. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
  14523. ArrayRef<int> Mask = SVN->getMask();
  14524. SDValue X = Vec.getOperand(0);
  14525. SDValue Y = Vec.getOperand(1);
  14526. // Vec's operand 0 is using indices from 0 to N-1 and
  14527. // operand 1 from N to 2N - 1, where N is the number of
  14528. // elements in the vectors.
  14529. int XOffset = -1;
  14530. if (InsertVal.getOperand(0) == X) {
  14531. XOffset = 0;
  14532. } else if (InsertVal.getOperand(0) == Y) {
  14533. XOffset = X.getValueType().getVectorNumElements();
  14534. }
  14535. if (XOffset != -1) {
  14536. SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
  14537. auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
  14538. NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
  14539. assert(NewMask[InsIndex] <
  14540. (int)(2 * Vec.getValueType().getVectorNumElements()) &&
  14541. NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
  14542. SDValue LegalShuffle =
  14543. TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
  14544. Y, NewMask, DAG);
  14545. if (LegalShuffle)
  14546. return LegalShuffle;
  14547. }
  14548. }
  14549. // insert_vector_elt V, (bitcast X from vector type), IdxC -->
  14550. // bitcast(shuffle (bitcast V), (extended X), Mask)
  14551. // Note: We do not use an insert_subvector node because that requires a
  14552. // legal subvector type.
  14553. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
  14554. !InsertVal.getOperand(0).getValueType().isVector())
  14555. return SDValue();
  14556. SDValue SubVec = InsertVal.getOperand(0);
  14557. SDValue DestVec = N->getOperand(0);
  14558. EVT SubVecVT = SubVec.getValueType();
  14559. EVT VT = DestVec.getValueType();
  14560. unsigned NumSrcElts = SubVecVT.getVectorNumElements();
  14561. unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
  14562. unsigned NumMaskVals = ExtendRatio * NumSrcElts;
  14563. // Step 1: Create a shuffle mask that implements this insert operation. The
  14564. // vector that we are inserting into will be operand 0 of the shuffle, so
  14565. // those elements are just 'i'. The inserted subvector is in the first
  14566. // positions of operand 1 of the shuffle. Example:
  14567. // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
  14568. SmallVector<int, 16> Mask(NumMaskVals);
  14569. for (unsigned i = 0; i != NumMaskVals; ++i) {
  14570. if (i / NumSrcElts == InsIndex)
  14571. Mask[i] = (i % NumSrcElts) + NumMaskVals;
  14572. else
  14573. Mask[i] = i;
  14574. }
  14575. // Bail out if the target can not handle the shuffle we want to create.
  14576. EVT SubVecEltVT = SubVecVT.getVectorElementType();
  14577. EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
  14578. if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
  14579. return SDValue();
  14580. // Step 2: Create a wide vector from the inserted source vector by appending
  14581. // undefined elements. This is the same size as our destination vector.
  14582. SDLoc DL(N);
  14583. SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
  14584. ConcatOps[0] = SubVec;
  14585. SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
  14586. // Step 3: Shuffle in the padded subvector.
  14587. SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
  14588. SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
  14589. AddToWorklist(PaddedSubV.getNode());
  14590. AddToWorklist(DestVecBC.getNode());
  14591. AddToWorklist(Shuf.getNode());
  14592. return DAG.getBitcast(VT, Shuf);
  14593. }
  14594. SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
  14595. SDValue InVec = N->getOperand(0);
  14596. SDValue InVal = N->getOperand(1);
  14597. SDValue EltNo = N->getOperand(2);
  14598. SDLoc DL(N);
  14599. // If the inserted element is an UNDEF, just use the input vector.
  14600. if (InVal.isUndef())
  14601. return InVec;
  14602. EVT VT = InVec.getValueType();
  14603. unsigned NumElts = VT.getVectorNumElements();
  14604. // Remove redundant insertions:
  14605. // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
  14606. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  14607. InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
  14608. return InVec;
  14609. auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
  14610. if (!IndexC) {
  14611. // If this is variable insert to undef vector, it might be better to splat:
  14612. // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
  14613. if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
  14614. SmallVector<SDValue, 8> Ops(NumElts, InVal);
  14615. return DAG.getBuildVector(VT, DL, Ops);
  14616. }
  14617. return SDValue();
  14618. }
  14619. // We must know which element is being inserted for folds below here.
  14620. unsigned Elt = IndexC->getZExtValue();
  14621. if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
  14622. return Shuf;
  14623. // Canonicalize insert_vector_elt dag nodes.
  14624. // Example:
  14625. // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
  14626. // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
  14627. //
  14628. // Do this only if the child insert_vector node has one use; also
  14629. // do this only if indices are both constants and Idx1 < Idx0.
  14630. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
  14631. && isa<ConstantSDNode>(InVec.getOperand(2))) {
  14632. unsigned OtherElt = InVec.getConstantOperandVal(2);
  14633. if (Elt < OtherElt) {
  14634. // Swap nodes.
  14635. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
  14636. InVec.getOperand(0), InVal, EltNo);
  14637. AddToWorklist(NewOp.getNode());
  14638. return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
  14639. VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
  14640. }
  14641. }
  14642. // If we can't generate a legal BUILD_VECTOR, exit
  14643. if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
  14644. return SDValue();
  14645. // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
  14646. // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
  14647. // vector elements.
  14648. SmallVector<SDValue, 8> Ops;
  14649. // Do not combine these two vectors if the output vector will not replace
  14650. // the input vector.
  14651. if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
  14652. Ops.append(InVec.getNode()->op_begin(),
  14653. InVec.getNode()->op_end());
  14654. } else if (InVec.isUndef()) {
  14655. Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
  14656. } else {
  14657. return SDValue();
  14658. }
  14659. assert(Ops.size() == NumElts && "Unexpected vector size");
  14660. // Insert the element
  14661. if (Elt < Ops.size()) {
  14662. // All the operands of BUILD_VECTOR must have the same type;
  14663. // we enforce that here.
  14664. EVT OpVT = Ops[0].getValueType();
  14665. Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
  14666. }
  14667. // Return the new vector
  14668. return DAG.getBuildVector(VT, DL, Ops);
  14669. }
  14670. SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
  14671. SDValue EltNo,
  14672. LoadSDNode *OriginalLoad) {
  14673. assert(OriginalLoad->isSimple());
  14674. EVT ResultVT = EVE->getValueType(0);
  14675. EVT VecEltVT = InVecVT.getVectorElementType();
  14676. unsigned Align = OriginalLoad->getAlignment();
  14677. unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
  14678. VecEltVT.getTypeForEVT(*DAG.getContext()));
  14679. if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
  14680. return SDValue();
  14681. ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
  14682. ISD::NON_EXTLOAD : ISD::EXTLOAD;
  14683. if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
  14684. return SDValue();
  14685. Align = NewAlign;
  14686. SDValue NewPtr = OriginalLoad->getBasePtr();
  14687. SDValue Offset;
  14688. EVT PtrType = NewPtr.getValueType();
  14689. MachinePointerInfo MPI;
  14690. SDLoc DL(EVE);
  14691. if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
  14692. int Elt = ConstEltNo->getZExtValue();
  14693. unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
  14694. Offset = DAG.getConstant(PtrOff, DL, PtrType);
  14695. MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
  14696. } else {
  14697. Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
  14698. Offset = DAG.getNode(
  14699. ISD::MUL, DL, PtrType, Offset,
  14700. DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
  14701. // Discard the pointer info except the address space because the memory
  14702. // operand can't represent this new access since the offset is variable.
  14703. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
  14704. }
  14705. NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
  14706. // The replacement we need to do here is a little tricky: we need to
  14707. // replace an extractelement of a load with a load.
  14708. // Use ReplaceAllUsesOfValuesWith to do the replacement.
  14709. // Note that this replacement assumes that the extractvalue is the only
  14710. // use of the load; that's okay because we don't want to perform this
  14711. // transformation in other cases anyway.
  14712. SDValue Load;
  14713. SDValue Chain;
  14714. if (ResultVT.bitsGT(VecEltVT)) {
  14715. // If the result type of vextract is wider than the load, then issue an
  14716. // extending load instead.
  14717. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
  14718. VecEltVT)
  14719. ? ISD::ZEXTLOAD
  14720. : ISD::EXTLOAD;
  14721. Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
  14722. OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
  14723. Align, OriginalLoad->getMemOperand()->getFlags(),
  14724. OriginalLoad->getAAInfo());
  14725. Chain = Load.getValue(1);
  14726. } else {
  14727. Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
  14728. MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
  14729. OriginalLoad->getAAInfo());
  14730. Chain = Load.getValue(1);
  14731. if (ResultVT.bitsLT(VecEltVT))
  14732. Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
  14733. else
  14734. Load = DAG.getBitcast(ResultVT, Load);
  14735. }
  14736. WorklistRemover DeadNodes(*this);
  14737. SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
  14738. SDValue To[] = { Load, Chain };
  14739. DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
  14740. // Since we're explicitly calling ReplaceAllUses, add the new node to the
  14741. // worklist explicitly as well.
  14742. AddToWorklist(Load.getNode());
  14743. AddUsersToWorklist(Load.getNode()); // Add users too
  14744. // Make sure to revisit this node to clean it up; it will usually be dead.
  14745. AddToWorklist(EVE);
  14746. ++OpsNarrowed;
  14747. return SDValue(EVE, 0);
  14748. }
  14749. /// Transform a vector binary operation into a scalar binary operation by moving
  14750. /// the math/logic after an extract element of a vector.
  14751. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
  14752. bool LegalOperations) {
  14753. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  14754. SDValue Vec = ExtElt->getOperand(0);
  14755. SDValue Index = ExtElt->getOperand(1);
  14756. auto *IndexC = dyn_cast<ConstantSDNode>(Index);
  14757. if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
  14758. Vec.getNode()->getNumValues() != 1)
  14759. return SDValue();
  14760. // Targets may want to avoid this to prevent an expensive register transfer.
  14761. if (!TLI.shouldScalarizeBinop(Vec))
  14762. return SDValue();
  14763. // Extracting an element of a vector constant is constant-folded, so this
  14764. // transform is just replacing a vector op with a scalar op while moving the
  14765. // extract.
  14766. SDValue Op0 = Vec.getOperand(0);
  14767. SDValue Op1 = Vec.getOperand(1);
  14768. if (isAnyConstantBuildVector(Op0, true) ||
  14769. isAnyConstantBuildVector(Op1, true)) {
  14770. // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
  14771. // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
  14772. SDLoc DL(ExtElt);
  14773. EVT VT = ExtElt->getValueType(0);
  14774. SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
  14775. SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
  14776. return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
  14777. }
  14778. return SDValue();
  14779. }
  14780. SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  14781. SDValue VecOp = N->getOperand(0);
  14782. SDValue Index = N->getOperand(1);
  14783. EVT ScalarVT = N->getValueType(0);
  14784. EVT VecVT = VecOp.getValueType();
  14785. if (VecOp.isUndef())
  14786. return DAG.getUNDEF(ScalarVT);
  14787. // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
  14788. //
  14789. // This only really matters if the index is non-constant since other combines
  14790. // on the constant elements already work.
  14791. SDLoc DL(N);
  14792. if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
  14793. Index == VecOp.getOperand(2)) {
  14794. SDValue Elt = VecOp.getOperand(1);
  14795. return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
  14796. }
  14797. // (vextract (scalar_to_vector val, 0) -> val
  14798. if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
  14799. // Check if the result type doesn't match the inserted element type. A
  14800. // SCALAR_TO_VECTOR may truncate the inserted element and the
  14801. // EXTRACT_VECTOR_ELT may widen the extracted vector.
  14802. SDValue InOp = VecOp.getOperand(0);
  14803. if (InOp.getValueType() != ScalarVT) {
  14804. assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
  14805. return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
  14806. }
  14807. return InOp;
  14808. }
  14809. // extract_vector_elt of out-of-bounds element -> UNDEF
  14810. auto *IndexC = dyn_cast<ConstantSDNode>(Index);
  14811. unsigned NumElts = VecVT.getVectorNumElements();
  14812. if (IndexC && IndexC->getAPIntValue().uge(NumElts))
  14813. return DAG.getUNDEF(ScalarVT);
  14814. // extract_vector_elt (build_vector x, y), 1 -> y
  14815. if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
  14816. TLI.isTypeLegal(VecVT) &&
  14817. (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
  14818. SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
  14819. EVT InEltVT = Elt.getValueType();
  14820. // Sometimes build_vector's scalar input types do not match result type.
  14821. if (ScalarVT == InEltVT)
  14822. return Elt;
  14823. // TODO: It may be useful to truncate if free if the build_vector implicitly
  14824. // converts.
  14825. }
  14826. // TODO: These transforms should not require the 'hasOneUse' restriction, but
  14827. // there are regressions on multiple targets without it. We can end up with a
  14828. // mess of scalar and vector code if we reduce only part of the DAG to scalar.
  14829. if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
  14830. VecOp.hasOneUse()) {
  14831. // The vector index of the LSBs of the source depend on the endian-ness.
  14832. bool IsLE = DAG.getDataLayout().isLittleEndian();
  14833. unsigned ExtractIndex = IndexC->getZExtValue();
  14834. // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
  14835. unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
  14836. SDValue BCSrc = VecOp.getOperand(0);
  14837. if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
  14838. return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
  14839. if (LegalTypes && BCSrc.getValueType().isInteger() &&
  14840. BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
  14841. // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
  14842. // trunc i64 X to i32
  14843. SDValue X = BCSrc.getOperand(0);
  14844. assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
  14845. "Extract element and scalar to vector can't change element type "
  14846. "from FP to integer.");
  14847. unsigned XBitWidth = X.getValueSizeInBits();
  14848. unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
  14849. BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
  14850. // An extract element return value type can be wider than its vector
  14851. // operand element type. In that case, the high bits are undefined, so
  14852. // it's possible that we may need to extend rather than truncate.
  14853. if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
  14854. assert(XBitWidth % VecEltBitWidth == 0 &&
  14855. "Scalar bitwidth must be a multiple of vector element bitwidth");
  14856. return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
  14857. }
  14858. }
  14859. }
  14860. if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
  14861. return BO;
  14862. // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
  14863. // We only perform this optimization before the op legalization phase because
  14864. // we may introduce new vector instructions which are not backed by TD
  14865. // patterns. For example on AVX, extracting elements from a wide vector
  14866. // without using extract_subvector. However, if we can find an underlying
  14867. // scalar value, then we can always use that.
  14868. if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
  14869. auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
  14870. // Find the new index to extract from.
  14871. int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
  14872. // Extracting an undef index is undef.
  14873. if (OrigElt == -1)
  14874. return DAG.getUNDEF(ScalarVT);
  14875. // Select the right vector half to extract from.
  14876. SDValue SVInVec;
  14877. if (OrigElt < (int)NumElts) {
  14878. SVInVec = VecOp.getOperand(0);
  14879. } else {
  14880. SVInVec = VecOp.getOperand(1);
  14881. OrigElt -= NumElts;
  14882. }
  14883. if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
  14884. SDValue InOp = SVInVec.getOperand(OrigElt);
  14885. if (InOp.getValueType() != ScalarVT) {
  14886. assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
  14887. InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
  14888. }
  14889. return InOp;
  14890. }
  14891. // FIXME: We should handle recursing on other vector shuffles and
  14892. // scalar_to_vector here as well.
  14893. if (!LegalOperations ||
  14894. // FIXME: Should really be just isOperationLegalOrCustom.
  14895. TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
  14896. TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
  14897. EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  14898. return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
  14899. DAG.getConstant(OrigElt, DL, IndexTy));
  14900. }
  14901. }
  14902. // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
  14903. // simplify it based on the (valid) extraction indices.
  14904. if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
  14905. return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  14906. Use->getOperand(0) == VecOp &&
  14907. isa<ConstantSDNode>(Use->getOperand(1));
  14908. })) {
  14909. APInt DemandedElts = APInt::getNullValue(NumElts);
  14910. for (SDNode *Use : VecOp->uses()) {
  14911. auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
  14912. if (CstElt->getAPIntValue().ult(NumElts))
  14913. DemandedElts.setBit(CstElt->getZExtValue());
  14914. }
  14915. if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
  14916. // We simplified the vector operand of this extract element. If this
  14917. // extract is not dead, visit it again so it is folded properly.
  14918. if (N->getOpcode() != ISD::DELETED_NODE)
  14919. AddToWorklist(N);
  14920. return SDValue(N, 0);
  14921. }
  14922. }
  14923. // Everything under here is trying to match an extract of a loaded value.
  14924. // If the result of load has to be truncated, then it's not necessarily
  14925. // profitable.
  14926. bool BCNumEltsChanged = false;
  14927. EVT ExtVT = VecVT.getVectorElementType();
  14928. EVT LVT = ExtVT;
  14929. if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
  14930. return SDValue();
  14931. if (VecOp.getOpcode() == ISD::BITCAST) {
  14932. // Don't duplicate a load with other uses.
  14933. if (!VecOp.hasOneUse())
  14934. return SDValue();
  14935. EVT BCVT = VecOp.getOperand(0).getValueType();
  14936. if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
  14937. return SDValue();
  14938. if (NumElts != BCVT.getVectorNumElements())
  14939. BCNumEltsChanged = true;
  14940. VecOp = VecOp.getOperand(0);
  14941. ExtVT = BCVT.getVectorElementType();
  14942. }
  14943. // extract (vector load $addr), i --> load $addr + i * size
  14944. if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
  14945. ISD::isNormalLoad(VecOp.getNode()) &&
  14946. !Index->hasPredecessor(VecOp.getNode())) {
  14947. auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
  14948. if (VecLoad && VecLoad->isSimple())
  14949. return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
  14950. }
  14951. // Perform only after legalization to ensure build_vector / vector_shuffle
  14952. // optimizations have already been done.
  14953. if (!LegalOperations || !IndexC)
  14954. return SDValue();
  14955. // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
  14956. // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
  14957. // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
  14958. int Elt = IndexC->getZExtValue();
  14959. LoadSDNode *LN0 = nullptr;
  14960. if (ISD::isNormalLoad(VecOp.getNode())) {
  14961. LN0 = cast<LoadSDNode>(VecOp);
  14962. } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  14963. VecOp.getOperand(0).getValueType() == ExtVT &&
  14964. ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
  14965. // Don't duplicate a load with other uses.
  14966. if (!VecOp.hasOneUse())
  14967. return SDValue();
  14968. LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
  14969. }
  14970. if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
  14971. // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
  14972. // =>
  14973. // (load $addr+1*size)
  14974. // Don't duplicate a load with other uses.
  14975. if (!VecOp.hasOneUse())
  14976. return SDValue();
  14977. // If the bit convert changed the number of elements, it is unsafe
  14978. // to examine the mask.
  14979. if (BCNumEltsChanged)
  14980. return SDValue();
  14981. // Select the input vector, guarding against out of range extract vector.
  14982. int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
  14983. VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
  14984. if (VecOp.getOpcode() == ISD::BITCAST) {
  14985. // Don't duplicate a load with other uses.
  14986. if (!VecOp.hasOneUse())
  14987. return SDValue();
  14988. VecOp = VecOp.getOperand(0);
  14989. }
  14990. if (ISD::isNormalLoad(VecOp.getNode())) {
  14991. LN0 = cast<LoadSDNode>(VecOp);
  14992. Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
  14993. Index = DAG.getConstant(Elt, DL, Index.getValueType());
  14994. }
  14995. }
  14996. // Make sure we found a non-volatile load and the extractelement is
  14997. // the only use.
  14998. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
  14999. return SDValue();
  15000. // If Idx was -1 above, Elt is going to be -1, so just return undef.
  15001. if (Elt == -1)
  15002. return DAG.getUNDEF(LVT);
  15003. return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
  15004. }
  15005. // Simplify (build_vec (ext )) to (bitcast (build_vec ))
  15006. SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
  15007. // We perform this optimization post type-legalization because
  15008. // the type-legalizer often scalarizes integer-promoted vectors.
  15009. // Performing this optimization before may create bit-casts which
  15010. // will be type-legalized to complex code sequences.
  15011. // We perform this optimization only before the operation legalizer because we
  15012. // may introduce illegal operations.
  15013. if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
  15014. return SDValue();
  15015. unsigned NumInScalars = N->getNumOperands();
  15016. SDLoc DL(N);
  15017. EVT VT = N->getValueType(0);
  15018. // Check to see if this is a BUILD_VECTOR of a bunch of values
  15019. // which come from any_extend or zero_extend nodes. If so, we can create
  15020. // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
  15021. // optimizations. We do not handle sign-extend because we can't fill the sign
  15022. // using shuffles.
  15023. EVT SourceType = MVT::Other;
  15024. bool AllAnyExt = true;
  15025. for (unsigned i = 0; i != NumInScalars; ++i) {
  15026. SDValue In = N->getOperand(i);
  15027. // Ignore undef inputs.
  15028. if (In.isUndef()) continue;
  15029. bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
  15030. bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
  15031. // Abort if the element is not an extension.
  15032. if (!ZeroExt && !AnyExt) {
  15033. SourceType = MVT::Other;
  15034. break;
  15035. }
  15036. // The input is a ZeroExt or AnyExt. Check the original type.
  15037. EVT InTy = In.getOperand(0).getValueType();
  15038. // Check that all of the widened source types are the same.
  15039. if (SourceType == MVT::Other)
  15040. // First time.
  15041. SourceType = InTy;
  15042. else if (InTy != SourceType) {
  15043. // Multiple income types. Abort.
  15044. SourceType = MVT::Other;
  15045. break;
  15046. }
  15047. // Check if all of the extends are ANY_EXTENDs.
  15048. AllAnyExt &= AnyExt;
  15049. }
  15050. // In order to have valid types, all of the inputs must be extended from the
  15051. // same source type and all of the inputs must be any or zero extend.
  15052. // Scalar sizes must be a power of two.
  15053. EVT OutScalarTy = VT.getScalarType();
  15054. bool ValidTypes = SourceType != MVT::Other &&
  15055. isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
  15056. isPowerOf2_32(SourceType.getSizeInBits());
  15057. // Create a new simpler BUILD_VECTOR sequence which other optimizations can
  15058. // turn into a single shuffle instruction.
  15059. if (!ValidTypes)
  15060. return SDValue();
  15061. bool isLE = DAG.getDataLayout().isLittleEndian();
  15062. unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
  15063. assert(ElemRatio > 1 && "Invalid element size ratio");
  15064. SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
  15065. DAG.getConstant(0, DL, SourceType);
  15066. unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
  15067. SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
  15068. // Populate the new build_vector
  15069. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  15070. SDValue Cast = N->getOperand(i);
  15071. assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
  15072. Cast.getOpcode() == ISD::ZERO_EXTEND ||
  15073. Cast.isUndef()) && "Invalid cast opcode");
  15074. SDValue In;
  15075. if (Cast.isUndef())
  15076. In = DAG.getUNDEF(SourceType);
  15077. else
  15078. In = Cast->getOperand(0);
  15079. unsigned Index = isLE ? (i * ElemRatio) :
  15080. (i * ElemRatio + (ElemRatio - 1));
  15081. assert(Index < Ops.size() && "Invalid index");
  15082. Ops[Index] = In;
  15083. }
  15084. // The type of the new BUILD_VECTOR node.
  15085. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
  15086. assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
  15087. "Invalid vector size");
  15088. // Check if the new vector type is legal.
  15089. if (!isTypeLegal(VecVT) ||
  15090. (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
  15091. TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
  15092. return SDValue();
  15093. // Make the new BUILD_VECTOR.
  15094. SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
  15095. // The new BUILD_VECTOR node has the potential to be further optimized.
  15096. AddToWorklist(BV.getNode());
  15097. // Bitcast to the desired type.
  15098. return DAG.getBitcast(VT, BV);
  15099. }
  15100. SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
  15101. ArrayRef<int> VectorMask,
  15102. SDValue VecIn1, SDValue VecIn2,
  15103. unsigned LeftIdx, bool DidSplitVec) {
  15104. MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  15105. SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
  15106. EVT VT = N->getValueType(0);
  15107. EVT InVT1 = VecIn1.getValueType();
  15108. EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
  15109. unsigned NumElems = VT.getVectorNumElements();
  15110. unsigned ShuffleNumElems = NumElems;
  15111. // If we artificially split a vector in two already, then the offsets in the
  15112. // operands will all be based off of VecIn1, even those in VecIn2.
  15113. unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
  15114. // We can't generate a shuffle node with mismatched input and output types.
  15115. // Try to make the types match the type of the output.
  15116. if (InVT1 != VT || InVT2 != VT) {
  15117. if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
  15118. // If the output vector length is a multiple of both input lengths,
  15119. // we can concatenate them and pad the rest with undefs.
  15120. unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
  15121. assert(NumConcats >= 2 && "Concat needs at least two inputs!");
  15122. SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
  15123. ConcatOps[0] = VecIn1;
  15124. ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
  15125. VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
  15126. VecIn2 = SDValue();
  15127. } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
  15128. if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
  15129. return SDValue();
  15130. if (!VecIn2.getNode()) {
  15131. // If we only have one input vector, and it's twice the size of the
  15132. // output, split it in two.
  15133. VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
  15134. DAG.getConstant(NumElems, DL, IdxTy));
  15135. VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
  15136. // Since we now have shorter input vectors, adjust the offset of the
  15137. // second vector's start.
  15138. Vec2Offset = NumElems;
  15139. } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
  15140. // VecIn1 is wider than the output, and we have another, possibly
  15141. // smaller input. Pad the smaller input with undefs, shuffle at the
  15142. // input vector width, and extract the output.
  15143. // The shuffle type is different than VT, so check legality again.
  15144. if (LegalOperations &&
  15145. !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
  15146. return SDValue();
  15147. // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
  15148. // lower it back into a BUILD_VECTOR. So if the inserted type is
  15149. // illegal, don't even try.
  15150. if (InVT1 != InVT2) {
  15151. if (!TLI.isTypeLegal(InVT2))
  15152. return SDValue();
  15153. VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
  15154. DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
  15155. }
  15156. ShuffleNumElems = NumElems * 2;
  15157. } else {
  15158. // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
  15159. // than VecIn1. We can't handle this for now - this case will disappear
  15160. // when we start sorting the vectors by type.
  15161. return SDValue();
  15162. }
  15163. } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
  15164. InVT1.getSizeInBits() == VT.getSizeInBits()) {
  15165. SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
  15166. ConcatOps[0] = VecIn2;
  15167. VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
  15168. } else {
  15169. // TODO: Support cases where the length mismatch isn't exactly by a
  15170. // factor of 2.
  15171. // TODO: Move this check upwards, so that if we have bad type
  15172. // mismatches, we don't create any DAG nodes.
  15173. return SDValue();
  15174. }
  15175. }
  15176. // Initialize mask to undef.
  15177. SmallVector<int, 8> Mask(ShuffleNumElems, -1);
  15178. // Only need to run up to the number of elements actually used, not the
  15179. // total number of elements in the shuffle - if we are shuffling a wider
  15180. // vector, the high lanes should be set to undef.
  15181. for (unsigned i = 0; i != NumElems; ++i) {
  15182. if (VectorMask[i] <= 0)
  15183. continue;
  15184. unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
  15185. if (VectorMask[i] == (int)LeftIdx) {
  15186. Mask[i] = ExtIndex;
  15187. } else if (VectorMask[i] == (int)LeftIdx + 1) {
  15188. Mask[i] = Vec2Offset + ExtIndex;
  15189. }
  15190. }
  15191. // The type the input vectors may have changed above.
  15192. InVT1 = VecIn1.getValueType();
  15193. // If we already have a VecIn2, it should have the same type as VecIn1.
  15194. // If we don't, get an undef/zero vector of the appropriate type.
  15195. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
  15196. assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
  15197. SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
  15198. if (ShuffleNumElems > NumElems)
  15199. Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
  15200. return Shuffle;
  15201. }
  15202. static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
  15203. assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
  15204. // First, determine where the build vector is not undef.
  15205. // TODO: We could extend this to handle zero elements as well as undefs.
  15206. int NumBVOps = BV->getNumOperands();
  15207. int ZextElt = -1;
  15208. for (int i = 0; i != NumBVOps; ++i) {
  15209. SDValue Op = BV->getOperand(i);
  15210. if (Op.isUndef())
  15211. continue;
  15212. if (ZextElt == -1)
  15213. ZextElt = i;
  15214. else
  15215. return SDValue();
  15216. }
  15217. // Bail out if there's no non-undef element.
  15218. if (ZextElt == -1)
  15219. return SDValue();
  15220. // The build vector contains some number of undef elements and exactly
  15221. // one other element. That other element must be a zero-extended scalar
  15222. // extracted from a vector at a constant index to turn this into a shuffle.
  15223. // Also, require that the build vector does not implicitly truncate/extend
  15224. // its elements.
  15225. // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
  15226. EVT VT = BV->getValueType(0);
  15227. SDValue Zext = BV->getOperand(ZextElt);
  15228. if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
  15229. Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
  15230. !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
  15231. Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
  15232. return SDValue();
  15233. // The zero-extend must be a multiple of the source size, and we must be
  15234. // building a vector of the same size as the source of the extract element.
  15235. SDValue Extract = Zext.getOperand(0);
  15236. unsigned DestSize = Zext.getValueSizeInBits();
  15237. unsigned SrcSize = Extract.getValueSizeInBits();
  15238. if (DestSize % SrcSize != 0 ||
  15239. Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
  15240. return SDValue();
  15241. // Create a shuffle mask that will combine the extracted element with zeros
  15242. // and undefs.
  15243. int ZextRatio = DestSize / SrcSize;
  15244. int NumMaskElts = NumBVOps * ZextRatio;
  15245. SmallVector<int, 32> ShufMask(NumMaskElts, -1);
  15246. for (int i = 0; i != NumMaskElts; ++i) {
  15247. if (i / ZextRatio == ZextElt) {
  15248. // The low bits of the (potentially translated) extracted element map to
  15249. // the source vector. The high bits map to zero. We will use a zero vector
  15250. // as the 2nd source operand of the shuffle, so use the 1st element of
  15251. // that vector (mask value is number-of-elements) for the high bits.
  15252. if (i % ZextRatio == 0)
  15253. ShufMask[i] = Extract.getConstantOperandVal(1);
  15254. else
  15255. ShufMask[i] = NumMaskElts;
  15256. }
  15257. // Undef elements of the build vector remain undef because we initialize
  15258. // the shuffle mask with -1.
  15259. }
  15260. // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
  15261. // bitcast (shuffle V, ZeroVec, VectorMask)
  15262. SDLoc DL(BV);
  15263. EVT VecVT = Extract.getOperand(0).getValueType();
  15264. SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
  15265. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15266. SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
  15267. ZeroVec, ShufMask, DAG);
  15268. if (!Shuf)
  15269. return SDValue();
  15270. return DAG.getBitcast(VT, Shuf);
  15271. }
  15272. // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
  15273. // operations. If the types of the vectors we're extracting from allow it,
  15274. // turn this into a vector_shuffle node.
  15275. SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
  15276. SDLoc DL(N);
  15277. EVT VT = N->getValueType(0);
  15278. // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
  15279. if (!isTypeLegal(VT))
  15280. return SDValue();
  15281. if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
  15282. return V;
  15283. // May only combine to shuffle after legalize if shuffle is legal.
  15284. if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
  15285. return SDValue();
  15286. bool UsesZeroVector = false;
  15287. unsigned NumElems = N->getNumOperands();
  15288. // Record, for each element of the newly built vector, which input vector
  15289. // that element comes from. -1 stands for undef, 0 for the zero vector,
  15290. // and positive values for the input vectors.
  15291. // VectorMask maps each element to its vector number, and VecIn maps vector
  15292. // numbers to their initial SDValues.
  15293. SmallVector<int, 8> VectorMask(NumElems, -1);
  15294. SmallVector<SDValue, 8> VecIn;
  15295. VecIn.push_back(SDValue());
  15296. for (unsigned i = 0; i != NumElems; ++i) {
  15297. SDValue Op = N->getOperand(i);
  15298. if (Op.isUndef())
  15299. continue;
  15300. // See if we can use a blend with a zero vector.
  15301. // TODO: Should we generalize this to a blend with an arbitrary constant
  15302. // vector?
  15303. if (isNullConstant(Op) || isNullFPConstant(Op)) {
  15304. UsesZeroVector = true;
  15305. VectorMask[i] = 0;
  15306. continue;
  15307. }
  15308. // Not an undef or zero. If the input is something other than an
  15309. // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
  15310. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
  15311. !isa<ConstantSDNode>(Op.getOperand(1)))
  15312. return SDValue();
  15313. SDValue ExtractedFromVec = Op.getOperand(0);
  15314. const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
  15315. if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
  15316. return SDValue();
  15317. // All inputs must have the same element type as the output.
  15318. if (VT.getVectorElementType() !=
  15319. ExtractedFromVec.getValueType().getVectorElementType())
  15320. return SDValue();
  15321. // Have we seen this input vector before?
  15322. // The vectors are expected to be tiny (usually 1 or 2 elements), so using
  15323. // a map back from SDValues to numbers isn't worth it.
  15324. unsigned Idx = std::distance(
  15325. VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
  15326. if (Idx == VecIn.size())
  15327. VecIn.push_back(ExtractedFromVec);
  15328. VectorMask[i] = Idx;
  15329. }
  15330. // If we didn't find at least one input vector, bail out.
  15331. if (VecIn.size() < 2)
  15332. return SDValue();
  15333. // If all the Operands of BUILD_VECTOR extract from same
  15334. // vector, then split the vector efficiently based on the maximum
  15335. // vector access index and adjust the VectorMask and
  15336. // VecIn accordingly.
  15337. bool DidSplitVec = false;
  15338. if (VecIn.size() == 2) {
  15339. unsigned MaxIndex = 0;
  15340. unsigned NearestPow2 = 0;
  15341. SDValue Vec = VecIn.back();
  15342. EVT InVT = Vec.getValueType();
  15343. MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  15344. SmallVector<unsigned, 8> IndexVec(NumElems, 0);
  15345. for (unsigned i = 0; i < NumElems; i++) {
  15346. if (VectorMask[i] <= 0)
  15347. continue;
  15348. unsigned Index = N->getOperand(i).getConstantOperandVal(1);
  15349. IndexVec[i] = Index;
  15350. MaxIndex = std::max(MaxIndex, Index);
  15351. }
  15352. NearestPow2 = PowerOf2Ceil(MaxIndex);
  15353. if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
  15354. NumElems * 2 < NearestPow2) {
  15355. unsigned SplitSize = NearestPow2 / 2;
  15356. EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
  15357. InVT.getVectorElementType(), SplitSize);
  15358. if (TLI.isTypeLegal(SplitVT)) {
  15359. SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
  15360. DAG.getConstant(SplitSize, DL, IdxTy));
  15361. SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
  15362. DAG.getConstant(0, DL, IdxTy));
  15363. VecIn.pop_back();
  15364. VecIn.push_back(VecIn1);
  15365. VecIn.push_back(VecIn2);
  15366. DidSplitVec = true;
  15367. for (unsigned i = 0; i < NumElems; i++) {
  15368. if (VectorMask[i] <= 0)
  15369. continue;
  15370. VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
  15371. }
  15372. }
  15373. }
  15374. }
  15375. // TODO: We want to sort the vectors by descending length, so that adjacent
  15376. // pairs have similar length, and the longer vector is always first in the
  15377. // pair.
  15378. // TODO: Should this fire if some of the input vectors has illegal type (like
  15379. // it does now), or should we let legalization run its course first?
  15380. // Shuffle phase:
  15381. // Take pairs of vectors, and shuffle them so that the result has elements
  15382. // from these vectors in the correct places.
  15383. // For example, given:
  15384. // t10: i32 = extract_vector_elt t1, Constant:i64<0>
  15385. // t11: i32 = extract_vector_elt t2, Constant:i64<0>
  15386. // t12: i32 = extract_vector_elt t3, Constant:i64<0>
  15387. // t13: i32 = extract_vector_elt t1, Constant:i64<1>
  15388. // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
  15389. // We will generate:
  15390. // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
  15391. // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
  15392. SmallVector<SDValue, 4> Shuffles;
  15393. for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
  15394. unsigned LeftIdx = 2 * In + 1;
  15395. SDValue VecLeft = VecIn[LeftIdx];
  15396. SDValue VecRight =
  15397. (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
  15398. if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
  15399. VecRight, LeftIdx, DidSplitVec))
  15400. Shuffles.push_back(Shuffle);
  15401. else
  15402. return SDValue();
  15403. }
  15404. // If we need the zero vector as an "ingredient" in the blend tree, add it
  15405. // to the list of shuffles.
  15406. if (UsesZeroVector)
  15407. Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
  15408. : DAG.getConstantFP(0.0, DL, VT));
  15409. // If we only have one shuffle, we're done.
  15410. if (Shuffles.size() == 1)
  15411. return Shuffles[0];
  15412. // Update the vector mask to point to the post-shuffle vectors.
  15413. for (int &Vec : VectorMask)
  15414. if (Vec == 0)
  15415. Vec = Shuffles.size() - 1;
  15416. else
  15417. Vec = (Vec - 1) / 2;
  15418. // More than one shuffle. Generate a binary tree of blends, e.g. if from
  15419. // the previous step we got the set of shuffles t10, t11, t12, t13, we will
  15420. // generate:
  15421. // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
  15422. // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
  15423. // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
  15424. // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
  15425. // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
  15426. // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
  15427. // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
  15428. // Make sure the initial size of the shuffle list is even.
  15429. if (Shuffles.size() % 2)
  15430. Shuffles.push_back(DAG.getUNDEF(VT));
  15431. for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
  15432. if (CurSize % 2) {
  15433. Shuffles[CurSize] = DAG.getUNDEF(VT);
  15434. CurSize++;
  15435. }
  15436. for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
  15437. int Left = 2 * In;
  15438. int Right = 2 * In + 1;
  15439. SmallVector<int, 8> Mask(NumElems, -1);
  15440. for (unsigned i = 0; i != NumElems; ++i) {
  15441. if (VectorMask[i] == Left) {
  15442. Mask[i] = i;
  15443. VectorMask[i] = In;
  15444. } else if (VectorMask[i] == Right) {
  15445. Mask[i] = i + NumElems;
  15446. VectorMask[i] = In;
  15447. }
  15448. }
  15449. Shuffles[In] =
  15450. DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
  15451. }
  15452. }
  15453. return Shuffles[0];
  15454. }
  15455. // Try to turn a build vector of zero extends of extract vector elts into a
  15456. // a vector zero extend and possibly an extract subvector.
  15457. // TODO: Support sign extend?
  15458. // TODO: Allow undef elements?
  15459. SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
  15460. if (LegalOperations)
  15461. return SDValue();
  15462. EVT VT = N->getValueType(0);
  15463. bool FoundZeroExtend = false;
  15464. SDValue Op0 = N->getOperand(0);
  15465. auto checkElem = [&](SDValue Op) -> int64_t {
  15466. unsigned Opc = Op.getOpcode();
  15467. FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
  15468. if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
  15469. Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
  15470. Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
  15471. if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
  15472. return C->getZExtValue();
  15473. return -1;
  15474. };
  15475. // Make sure the first element matches
  15476. // (zext (extract_vector_elt X, C))
  15477. int64_t Offset = checkElem(Op0);
  15478. if (Offset < 0)
  15479. return SDValue();
  15480. unsigned NumElems = N->getNumOperands();
  15481. SDValue In = Op0.getOperand(0).getOperand(0);
  15482. EVT InSVT = In.getValueType().getScalarType();
  15483. EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
  15484. // Don't create an illegal input type after type legalization.
  15485. if (LegalTypes && !TLI.isTypeLegal(InVT))
  15486. return SDValue();
  15487. // Ensure all the elements come from the same vector and are adjacent.
  15488. for (unsigned i = 1; i != NumElems; ++i) {
  15489. if ((Offset + i) != checkElem(N->getOperand(i)))
  15490. return SDValue();
  15491. }
  15492. SDLoc DL(N);
  15493. In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
  15494. Op0.getOperand(0).getOperand(1));
  15495. return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
  15496. VT, In);
  15497. }
  15498. SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  15499. EVT VT = N->getValueType(0);
  15500. // A vector built entirely of undefs is undef.
  15501. if (ISD::allOperandsUndef(N))
  15502. return DAG.getUNDEF(VT);
  15503. // If this is a splat of a bitcast from another vector, change to a
  15504. // concat_vector.
  15505. // For example:
  15506. // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
  15507. // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
  15508. //
  15509. // If X is a build_vector itself, the concat can become a larger build_vector.
  15510. // TODO: Maybe this is useful for non-splat too?
  15511. if (!LegalOperations) {
  15512. if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
  15513. Splat = peekThroughBitcasts(Splat);
  15514. EVT SrcVT = Splat.getValueType();
  15515. if (SrcVT.isVector()) {
  15516. unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
  15517. EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
  15518. SrcVT.getVectorElementType(), NumElts);
  15519. if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
  15520. SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
  15521. SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
  15522. NewVT, Ops);
  15523. return DAG.getBitcast(VT, Concat);
  15524. }
  15525. }
  15526. }
  15527. }
  15528. // Check if we can express BUILD VECTOR via subvector extract.
  15529. if (!LegalTypes && (N->getNumOperands() > 1)) {
  15530. SDValue Op0 = N->getOperand(0);
  15531. auto checkElem = [&](SDValue Op) -> uint64_t {
  15532. if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
  15533. (Op0.getOperand(0) == Op.getOperand(0)))
  15534. if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
  15535. return CNode->getZExtValue();
  15536. return -1;
  15537. };
  15538. int Offset = checkElem(Op0);
  15539. for (unsigned i = 0; i < N->getNumOperands(); ++i) {
  15540. if (Offset + i != checkElem(N->getOperand(i))) {
  15541. Offset = -1;
  15542. break;
  15543. }
  15544. }
  15545. if ((Offset == 0) &&
  15546. (Op0.getOperand(0).getValueType() == N->getValueType(0)))
  15547. return Op0.getOperand(0);
  15548. if ((Offset != -1) &&
  15549. ((Offset % N->getValueType(0).getVectorNumElements()) ==
  15550. 0)) // IDX must be multiple of output size.
  15551. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
  15552. Op0.getOperand(0), Op0.getOperand(1));
  15553. }
  15554. if (SDValue V = convertBuildVecZextToZext(N))
  15555. return V;
  15556. if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
  15557. return V;
  15558. if (SDValue V = reduceBuildVecToShuffle(N))
  15559. return V;
  15560. return SDValue();
  15561. }
  15562. static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
  15563. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15564. EVT OpVT = N->getOperand(0).getValueType();
  15565. // If the operands are legal vectors, leave them alone.
  15566. if (TLI.isTypeLegal(OpVT))
  15567. return SDValue();
  15568. SDLoc DL(N);
  15569. EVT VT = N->getValueType(0);
  15570. SmallVector<SDValue, 8> Ops;
  15571. EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
  15572. SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
  15573. // Keep track of what we encounter.
  15574. bool AnyInteger = false;
  15575. bool AnyFP = false;
  15576. for (const SDValue &Op : N->ops()) {
  15577. if (ISD::BITCAST == Op.getOpcode() &&
  15578. !Op.getOperand(0).getValueType().isVector())
  15579. Ops.push_back(Op.getOperand(0));
  15580. else if (ISD::UNDEF == Op.getOpcode())
  15581. Ops.push_back(ScalarUndef);
  15582. else
  15583. return SDValue();
  15584. // Note whether we encounter an integer or floating point scalar.
  15585. // If it's neither, bail out, it could be something weird like x86mmx.
  15586. EVT LastOpVT = Ops.back().getValueType();
  15587. if (LastOpVT.isFloatingPoint())
  15588. AnyFP = true;
  15589. else if (LastOpVT.isInteger())
  15590. AnyInteger = true;
  15591. else
  15592. return SDValue();
  15593. }
  15594. // If any of the operands is a floating point scalar bitcast to a vector,
  15595. // use floating point types throughout, and bitcast everything.
  15596. // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
  15597. if (AnyFP) {
  15598. SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
  15599. ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
  15600. if (AnyInteger) {
  15601. for (SDValue &Op : Ops) {
  15602. if (Op.getValueType() == SVT)
  15603. continue;
  15604. if (Op.isUndef())
  15605. Op = ScalarUndef;
  15606. else
  15607. Op = DAG.getBitcast(SVT, Op);
  15608. }
  15609. }
  15610. }
  15611. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
  15612. VT.getSizeInBits() / SVT.getSizeInBits());
  15613. return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
  15614. }
  15615. // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
  15616. // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
  15617. // most two distinct vectors the same size as the result, attempt to turn this
  15618. // into a legal shuffle.
  15619. static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
  15620. EVT VT = N->getValueType(0);
  15621. EVT OpVT = N->getOperand(0).getValueType();
  15622. int NumElts = VT.getVectorNumElements();
  15623. int NumOpElts = OpVT.getVectorNumElements();
  15624. SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
  15625. SmallVector<int, 8> Mask;
  15626. for (SDValue Op : N->ops()) {
  15627. Op = peekThroughBitcasts(Op);
  15628. // UNDEF nodes convert to UNDEF shuffle mask values.
  15629. if (Op.isUndef()) {
  15630. Mask.append((unsigned)NumOpElts, -1);
  15631. continue;
  15632. }
  15633. if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  15634. return SDValue();
  15635. // What vector are we extracting the subvector from and at what index?
  15636. SDValue ExtVec = Op.getOperand(0);
  15637. // We want the EVT of the original extraction to correctly scale the
  15638. // extraction index.
  15639. EVT ExtVT = ExtVec.getValueType();
  15640. ExtVec = peekThroughBitcasts(ExtVec);
  15641. // UNDEF nodes convert to UNDEF shuffle mask values.
  15642. if (ExtVec.isUndef()) {
  15643. Mask.append((unsigned)NumOpElts, -1);
  15644. continue;
  15645. }
  15646. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  15647. return SDValue();
  15648. int ExtIdx = Op.getConstantOperandVal(1);
  15649. // Ensure that we are extracting a subvector from a vector the same
  15650. // size as the result.
  15651. if (ExtVT.getSizeInBits() != VT.getSizeInBits())
  15652. return SDValue();
  15653. // Scale the subvector index to account for any bitcast.
  15654. int NumExtElts = ExtVT.getVectorNumElements();
  15655. if (0 == (NumExtElts % NumElts))
  15656. ExtIdx /= (NumExtElts / NumElts);
  15657. else if (0 == (NumElts % NumExtElts))
  15658. ExtIdx *= (NumElts / NumExtElts);
  15659. else
  15660. return SDValue();
  15661. // At most we can reference 2 inputs in the final shuffle.
  15662. if (SV0.isUndef() || SV0 == ExtVec) {
  15663. SV0 = ExtVec;
  15664. for (int i = 0; i != NumOpElts; ++i)
  15665. Mask.push_back(i + ExtIdx);
  15666. } else if (SV1.isUndef() || SV1 == ExtVec) {
  15667. SV1 = ExtVec;
  15668. for (int i = 0; i != NumOpElts; ++i)
  15669. Mask.push_back(i + ExtIdx + NumElts);
  15670. } else {
  15671. return SDValue();
  15672. }
  15673. }
  15674. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15675. return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
  15676. DAG.getBitcast(VT, SV1), Mask, DAG);
  15677. }
  15678. SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
  15679. // If we only have one input vector, we don't need to do any concatenation.
  15680. if (N->getNumOperands() == 1)
  15681. return N->getOperand(0);
  15682. // Check if all of the operands are undefs.
  15683. EVT VT = N->getValueType(0);
  15684. if (ISD::allOperandsUndef(N))
  15685. return DAG.getUNDEF(VT);
  15686. // Optimize concat_vectors where all but the first of the vectors are undef.
  15687. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
  15688. return Op.isUndef();
  15689. })) {
  15690. SDValue In = N->getOperand(0);
  15691. assert(In.getValueType().isVector() && "Must concat vectors");
  15692. // If the input is a concat_vectors, just make a larger concat by padding
  15693. // with smaller undefs.
  15694. if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
  15695. unsigned NumOps = N->getNumOperands() * In.getNumOperands();
  15696. SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
  15697. Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
  15698. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  15699. }
  15700. SDValue Scalar = peekThroughOneUseBitcasts(In);
  15701. // concat_vectors(scalar_to_vector(scalar), undef) ->
  15702. // scalar_to_vector(scalar)
  15703. if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  15704. Scalar.hasOneUse()) {
  15705. EVT SVT = Scalar.getValueType().getVectorElementType();
  15706. if (SVT == Scalar.getOperand(0).getValueType())
  15707. Scalar = Scalar.getOperand(0);
  15708. }
  15709. // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
  15710. if (!Scalar.getValueType().isVector()) {
  15711. // If the bitcast type isn't legal, it might be a trunc of a legal type;
  15712. // look through the trunc so we can still do the transform:
  15713. // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
  15714. if (Scalar->getOpcode() == ISD::TRUNCATE &&
  15715. !TLI.isTypeLegal(Scalar.getValueType()) &&
  15716. TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
  15717. Scalar = Scalar->getOperand(0);
  15718. EVT SclTy = Scalar.getValueType();
  15719. if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
  15720. return SDValue();
  15721. // Bail out if the vector size is not a multiple of the scalar size.
  15722. if (VT.getSizeInBits() % SclTy.getSizeInBits())
  15723. return SDValue();
  15724. unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
  15725. if (VNTNumElms < 2)
  15726. return SDValue();
  15727. EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
  15728. if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
  15729. return SDValue();
  15730. SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
  15731. return DAG.getBitcast(VT, Res);
  15732. }
  15733. }
  15734. // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
  15735. // We have already tested above for an UNDEF only concatenation.
  15736. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
  15737. // -> (BUILD_VECTOR A, B, ..., C, D, ...)
  15738. auto IsBuildVectorOrUndef = [](const SDValue &Op) {
  15739. return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
  15740. };
  15741. if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
  15742. SmallVector<SDValue, 8> Opnds;
  15743. EVT SVT = VT.getScalarType();
  15744. EVT MinVT = SVT;
  15745. if (!SVT.isFloatingPoint()) {
  15746. // If BUILD_VECTOR are from built from integer, they may have different
  15747. // operand types. Get the smallest type and truncate all operands to it.
  15748. bool FoundMinVT = false;
  15749. for (const SDValue &Op : N->ops())
  15750. if (ISD::BUILD_VECTOR == Op.getOpcode()) {
  15751. EVT OpSVT = Op.getOperand(0).getValueType();
  15752. MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
  15753. FoundMinVT = true;
  15754. }
  15755. assert(FoundMinVT && "Concat vector type mismatch");
  15756. }
  15757. for (const SDValue &Op : N->ops()) {
  15758. EVT OpVT = Op.getValueType();
  15759. unsigned NumElts = OpVT.getVectorNumElements();
  15760. if (ISD::UNDEF == Op.getOpcode())
  15761. Opnds.append(NumElts, DAG.getUNDEF(MinVT));
  15762. if (ISD::BUILD_VECTOR == Op.getOpcode()) {
  15763. if (SVT.isFloatingPoint()) {
  15764. assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
  15765. Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
  15766. } else {
  15767. for (unsigned i = 0; i != NumElts; ++i)
  15768. Opnds.push_back(
  15769. DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
  15770. }
  15771. }
  15772. }
  15773. assert(VT.getVectorNumElements() == Opnds.size() &&
  15774. "Concat vector type mismatch");
  15775. return DAG.getBuildVector(VT, SDLoc(N), Opnds);
  15776. }
  15777. // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
  15778. if (SDValue V = combineConcatVectorOfScalars(N, DAG))
  15779. return V;
  15780. // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
  15781. if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
  15782. if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
  15783. return V;
  15784. // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
  15785. // nodes often generate nop CONCAT_VECTOR nodes.
  15786. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
  15787. // place the incoming vectors at the exact same location.
  15788. SDValue SingleSource = SDValue();
  15789. unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
  15790. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
  15791. SDValue Op = N->getOperand(i);
  15792. if (Op.isUndef())
  15793. continue;
  15794. // Check if this is the identity extract:
  15795. if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  15796. return SDValue();
  15797. // Find the single incoming vector for the extract_subvector.
  15798. if (SingleSource.getNode()) {
  15799. if (Op.getOperand(0) != SingleSource)
  15800. return SDValue();
  15801. } else {
  15802. SingleSource = Op.getOperand(0);
  15803. // Check the source type is the same as the type of the result.
  15804. // If not, this concat may extend the vector, so we can not
  15805. // optimize it away.
  15806. if (SingleSource.getValueType() != N->getValueType(0))
  15807. return SDValue();
  15808. }
  15809. auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  15810. // The extract index must be constant.
  15811. if (!CS)
  15812. return SDValue();
  15813. // Check that we are reading from the identity index.
  15814. unsigned IdentityIndex = i * PartNumElem;
  15815. if (CS->getAPIntValue() != IdentityIndex)
  15816. return SDValue();
  15817. }
  15818. if (SingleSource.getNode())
  15819. return SingleSource;
  15820. return SDValue();
  15821. }
  15822. // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
  15823. // if the subvector can be sourced for free.
  15824. static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
  15825. if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
  15826. V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
  15827. return V.getOperand(1);
  15828. }
  15829. auto *IndexC = dyn_cast<ConstantSDNode>(Index);
  15830. if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
  15831. V.getOperand(0).getValueType() == SubVT &&
  15832. (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
  15833. uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
  15834. return V.getOperand(SubIdx);
  15835. }
  15836. return SDValue();
  15837. }
  15838. static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
  15839. SelectionDAG &DAG) {
  15840. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15841. SDValue BinOp = Extract->getOperand(0);
  15842. unsigned BinOpcode = BinOp.getOpcode();
  15843. if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
  15844. return SDValue();
  15845. EVT VecVT = BinOp.getValueType();
  15846. SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
  15847. if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
  15848. return SDValue();
  15849. SDValue Index = Extract->getOperand(1);
  15850. EVT SubVT = Extract->getValueType(0);
  15851. if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
  15852. return SDValue();
  15853. SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
  15854. SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
  15855. // TODO: We could handle the case where only 1 operand is being inserted by
  15856. // creating an extract of the other operand, but that requires checking
  15857. // number of uses and/or costs.
  15858. if (!Sub0 || !Sub1)
  15859. return SDValue();
  15860. // We are inserting both operands of the wide binop only to extract back
  15861. // to the narrow vector size. Eliminate all of the insert/extract:
  15862. // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
  15863. return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
  15864. BinOp->getFlags());
  15865. }
  15866. /// If we are extracting a subvector produced by a wide binary operator try
  15867. /// to use a narrow binary operator and/or avoid concatenation and extraction.
  15868. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
  15869. // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
  15870. // some of these bailouts with other transforms.
  15871. if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
  15872. return V;
  15873. // The extract index must be a constant, so we can map it to a concat operand.
  15874. auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
  15875. if (!ExtractIndexC)
  15876. return SDValue();
  15877. // We are looking for an optionally bitcasted wide vector binary operator
  15878. // feeding an extract subvector.
  15879. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15880. SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
  15881. unsigned BOpcode = BinOp.getOpcode();
  15882. if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
  15883. return SDValue();
  15884. // The binop must be a vector type, so we can extract some fraction of it.
  15885. EVT WideBVT = BinOp.getValueType();
  15886. if (!WideBVT.isVector())
  15887. return SDValue();
  15888. EVT VT = Extract->getValueType(0);
  15889. unsigned ExtractIndex = ExtractIndexC->getZExtValue();
  15890. assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
  15891. "Extract index is not a multiple of the vector length.");
  15892. // Bail out if this is not a proper multiple width extraction.
  15893. unsigned WideWidth = WideBVT.getSizeInBits();
  15894. unsigned NarrowWidth = VT.getSizeInBits();
  15895. if (WideWidth % NarrowWidth != 0)
  15896. return SDValue();
  15897. // Bail out if we are extracting a fraction of a single operation. This can
  15898. // occur because we potentially looked through a bitcast of the binop.
  15899. unsigned NarrowingRatio = WideWidth / NarrowWidth;
  15900. unsigned WideNumElts = WideBVT.getVectorNumElements();
  15901. if (WideNumElts % NarrowingRatio != 0)
  15902. return SDValue();
  15903. // Bail out if the target does not support a narrower version of the binop.
  15904. EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
  15905. WideNumElts / NarrowingRatio);
  15906. if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
  15907. return SDValue();
  15908. // If extraction is cheap, we don't need to look at the binop operands
  15909. // for concat ops. The narrow binop alone makes this transform profitable.
  15910. // We can't just reuse the original extract index operand because we may have
  15911. // bitcasted.
  15912. unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
  15913. unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
  15914. EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
  15915. if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
  15916. BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
  15917. // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
  15918. SDLoc DL(Extract);
  15919. SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
  15920. SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
  15921. BinOp.getOperand(0), NewExtIndex);
  15922. SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
  15923. BinOp.getOperand(1), NewExtIndex);
  15924. SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
  15925. BinOp.getNode()->getFlags());
  15926. return DAG.getBitcast(VT, NarrowBinOp);
  15927. }
  15928. // Only handle the case where we are doubling and then halving. A larger ratio
  15929. // may require more than two narrow binops to replace the wide binop.
  15930. if (NarrowingRatio != 2)
  15931. return SDValue();
  15932. // TODO: The motivating case for this transform is an x86 AVX1 target. That
  15933. // target has temptingly almost legal versions of bitwise logic ops in 256-bit
  15934. // flavors, but no other 256-bit integer support. This could be extended to
  15935. // handle any binop, but that may require fixing/adding other folds to avoid
  15936. // codegen regressions.
  15937. if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
  15938. return SDValue();
  15939. // We need at least one concatenation operation of a binop operand to make
  15940. // this transform worthwhile. The concat must double the input vector sizes.
  15941. auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
  15942. if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
  15943. return V.getOperand(ConcatOpNum);
  15944. return SDValue();
  15945. };
  15946. SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
  15947. SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
  15948. if (SubVecL || SubVecR) {
  15949. // If a binop operand was not the result of a concat, we must extract a
  15950. // half-sized operand for our new narrow binop:
  15951. // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
  15952. // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
  15953. // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
  15954. SDLoc DL(Extract);
  15955. SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
  15956. SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
  15957. : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
  15958. BinOp.getOperand(0), IndexC);
  15959. SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
  15960. : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
  15961. BinOp.getOperand(1), IndexC);
  15962. SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
  15963. return DAG.getBitcast(VT, NarrowBinOp);
  15964. }
  15965. return SDValue();
  15966. }
  15967. /// If we are extracting a subvector from a wide vector load, convert to a
  15968. /// narrow load to eliminate the extraction:
  15969. /// (extract_subvector (load wide vector)) --> (load narrow vector)
  15970. static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
  15971. // TODO: Add support for big-endian. The offset calculation must be adjusted.
  15972. if (DAG.getDataLayout().isBigEndian())
  15973. return SDValue();
  15974. auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
  15975. auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
  15976. if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
  15977. !ExtIdx)
  15978. return SDValue();
  15979. // Allow targets to opt-out.
  15980. EVT VT = Extract->getValueType(0);
  15981. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  15982. if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
  15983. return SDValue();
  15984. // The narrow load will be offset from the base address of the old load if
  15985. // we are extracting from something besides index 0 (little-endian).
  15986. SDLoc DL(Extract);
  15987. SDValue BaseAddr = Ld->getOperand(1);
  15988. unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
  15989. // TODO: Use "BaseIndexOffset" to make this more effective.
  15990. SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
  15991. MachineFunction &MF = DAG.getMachineFunction();
  15992. MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
  15993. VT.getStoreSize());
  15994. SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
  15995. DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
  15996. return NewLd;
  15997. }
  15998. SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
  15999. EVT NVT = N->getValueType(0);
  16000. SDValue V = N->getOperand(0);
  16001. // Extract from UNDEF is UNDEF.
  16002. if (V.isUndef())
  16003. return DAG.getUNDEF(NVT);
  16004. if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
  16005. if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
  16006. return NarrowLoad;
  16007. // Combine an extract of an extract into a single extract_subvector.
  16008. // ext (ext X, C), 0 --> ext X, C
  16009. SDValue Index = N->getOperand(1);
  16010. if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
  16011. V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
  16012. if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
  16013. V.getConstantOperandVal(1)) &&
  16014. TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
  16015. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
  16016. V.getOperand(1));
  16017. }
  16018. }
  16019. // Try to move vector bitcast after extract_subv by scaling extraction index:
  16020. // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
  16021. if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
  16022. V.getOperand(0).getValueType().isVector()) {
  16023. SDValue SrcOp = V.getOperand(0);
  16024. EVT SrcVT = SrcOp.getValueType();
  16025. unsigned SrcNumElts = SrcVT.getVectorNumElements();
  16026. unsigned DestNumElts = V.getValueType().getVectorNumElements();
  16027. if ((SrcNumElts % DestNumElts) == 0) {
  16028. unsigned SrcDestRatio = SrcNumElts / DestNumElts;
  16029. unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
  16030. EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
  16031. NewExtNumElts);
  16032. if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
  16033. unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
  16034. SDLoc DL(N);
  16035. SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
  16036. SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
  16037. V.getOperand(0), NewIndex);
  16038. return DAG.getBitcast(NVT, NewExtract);
  16039. }
  16040. }
  16041. // TODO - handle (DestNumElts % SrcNumElts) == 0
  16042. }
  16043. // Combine:
  16044. // (extract_subvec (concat V1, V2, ...), i)
  16045. // Into:
  16046. // Vi if possible
  16047. // Only operand 0 is checked as 'concat' assumes all inputs of the same
  16048. // type.
  16049. if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
  16050. V.getOperand(0).getValueType() == NVT) {
  16051. unsigned Idx = N->getConstantOperandVal(1);
  16052. unsigned NumElems = NVT.getVectorNumElements();
  16053. assert((Idx % NumElems) == 0 &&
  16054. "IDX in concat is not a multiple of the result vector length.");
  16055. return V->getOperand(Idx / NumElems);
  16056. }
  16057. V = peekThroughBitcasts(V);
  16058. // If the input is a build vector. Try to make a smaller build vector.
  16059. if (V.getOpcode() == ISD::BUILD_VECTOR) {
  16060. if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
  16061. EVT InVT = V.getValueType();
  16062. unsigned ExtractSize = NVT.getSizeInBits();
  16063. unsigned EltSize = InVT.getScalarSizeInBits();
  16064. // Only do this if we won't split any elements.
  16065. if (ExtractSize % EltSize == 0) {
  16066. unsigned NumElems = ExtractSize / EltSize;
  16067. EVT EltVT = InVT.getVectorElementType();
  16068. EVT ExtractVT = NumElems == 1 ? EltVT
  16069. : EVT::getVectorVT(*DAG.getContext(),
  16070. EltVT, NumElems);
  16071. if ((Level < AfterLegalizeDAG ||
  16072. (NumElems == 1 ||
  16073. TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
  16074. (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
  16075. unsigned IdxVal = IdxC->getZExtValue();
  16076. IdxVal *= NVT.getScalarSizeInBits();
  16077. IdxVal /= EltSize;
  16078. if (NumElems == 1) {
  16079. SDValue Src = V->getOperand(IdxVal);
  16080. if (EltVT != Src.getValueType())
  16081. Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
  16082. return DAG.getBitcast(NVT, Src);
  16083. }
  16084. // Extract the pieces from the original build_vector.
  16085. SDValue BuildVec = DAG.getBuildVector(
  16086. ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
  16087. return DAG.getBitcast(NVT, BuildVec);
  16088. }
  16089. }
  16090. }
  16091. }
  16092. if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
  16093. // Handle only simple case where vector being inserted and vector
  16094. // being extracted are of same size.
  16095. EVT SmallVT = V.getOperand(1).getValueType();
  16096. if (!NVT.bitsEq(SmallVT))
  16097. return SDValue();
  16098. // Only handle cases where both indexes are constants.
  16099. auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
  16100. auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
  16101. if (InsIdx && ExtIdx) {
  16102. // Combine:
  16103. // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
  16104. // Into:
  16105. // indices are equal or bit offsets are equal => V1
  16106. // otherwise => (extract_subvec V1, ExtIdx)
  16107. if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
  16108. ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
  16109. return DAG.getBitcast(NVT, V.getOperand(1));
  16110. return DAG.getNode(
  16111. ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
  16112. DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
  16113. Index);
  16114. }
  16115. }
  16116. if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
  16117. return NarrowBOp;
  16118. if (SimplifyDemandedVectorElts(SDValue(N, 0)))
  16119. return SDValue(N, 0);
  16120. return SDValue();
  16121. }
  16122. /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
  16123. /// followed by concatenation. Narrow vector ops may have better performance
  16124. /// than wide ops, and this can unlock further narrowing of other vector ops.
  16125. /// Targets can invert this transform later if it is not profitable.
  16126. static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
  16127. SelectionDAG &DAG) {
  16128. SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
  16129. if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
  16130. N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
  16131. !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
  16132. return SDValue();
  16133. // Split the wide shuffle mask into halves. Any mask element that is accessing
  16134. // operand 1 is offset down to account for narrowing of the vectors.
  16135. ArrayRef<int> Mask = Shuf->getMask();
  16136. EVT VT = Shuf->getValueType(0);
  16137. unsigned NumElts = VT.getVectorNumElements();
  16138. unsigned HalfNumElts = NumElts / 2;
  16139. SmallVector<int, 16> Mask0(HalfNumElts, -1);
  16140. SmallVector<int, 16> Mask1(HalfNumElts, -1);
  16141. for (unsigned i = 0; i != NumElts; ++i) {
  16142. if (Mask[i] == -1)
  16143. continue;
  16144. int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
  16145. if (i < HalfNumElts)
  16146. Mask0[i] = M;
  16147. else
  16148. Mask1[i - HalfNumElts] = M;
  16149. }
  16150. // Ask the target if this is a valid transform.
  16151. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  16152. EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
  16153. HalfNumElts);
  16154. if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
  16155. !TLI.isShuffleMaskLegal(Mask1, HalfVT))
  16156. return SDValue();
  16157. // shuffle (concat X, undef), (concat Y, undef), Mask -->
  16158. // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
  16159. SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
  16160. SDLoc DL(Shuf);
  16161. SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
  16162. SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
  16163. return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
  16164. }
  16165. // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
  16166. // or turn a shuffle of a single concat into simpler shuffle then concat.
  16167. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
  16168. EVT VT = N->getValueType(0);
  16169. unsigned NumElts = VT.getVectorNumElements();
  16170. SDValue N0 = N->getOperand(0);
  16171. SDValue N1 = N->getOperand(1);
  16172. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  16173. ArrayRef<int> Mask = SVN->getMask();
  16174. SmallVector<SDValue, 4> Ops;
  16175. EVT ConcatVT = N0.getOperand(0).getValueType();
  16176. unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
  16177. unsigned NumConcats = NumElts / NumElemsPerConcat;
  16178. auto IsUndefMaskElt = [](int i) { return i == -1; };
  16179. // Special case: shuffle(concat(A,B)) can be more efficiently represented
  16180. // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
  16181. // half vector elements.
  16182. if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
  16183. llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
  16184. IsUndefMaskElt)) {
  16185. N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
  16186. N0.getOperand(1),
  16187. Mask.slice(0, NumElemsPerConcat));
  16188. N1 = DAG.getUNDEF(ConcatVT);
  16189. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
  16190. }
  16191. // Look at every vector that's inserted. We're looking for exact
  16192. // subvector-sized copies from a concatenated vector
  16193. for (unsigned I = 0; I != NumConcats; ++I) {
  16194. unsigned Begin = I * NumElemsPerConcat;
  16195. ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
  16196. // Make sure we're dealing with a copy.
  16197. if (llvm::all_of(SubMask, IsUndefMaskElt)) {
  16198. Ops.push_back(DAG.getUNDEF(ConcatVT));
  16199. continue;
  16200. }
  16201. int OpIdx = -1;
  16202. for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
  16203. if (IsUndefMaskElt(SubMask[i]))
  16204. continue;
  16205. if ((SubMask[i] % (int)NumElemsPerConcat) != i)
  16206. return SDValue();
  16207. int EltOpIdx = SubMask[i] / NumElemsPerConcat;
  16208. if (0 <= OpIdx && EltOpIdx != OpIdx)
  16209. return SDValue();
  16210. OpIdx = EltOpIdx;
  16211. }
  16212. assert(0 <= OpIdx && "Unknown concat_vectors op");
  16213. if (OpIdx < (int)N0.getNumOperands())
  16214. Ops.push_back(N0.getOperand(OpIdx));
  16215. else
  16216. Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
  16217. }
  16218. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  16219. }
  16220. // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
  16221. // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
  16222. //
  16223. // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
  16224. // a simplification in some sense, but it isn't appropriate in general: some
  16225. // BUILD_VECTORs are substantially cheaper than others. The general case
  16226. // of a BUILD_VECTOR requires inserting each element individually (or
  16227. // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
  16228. // all constants is a single constant pool load. A BUILD_VECTOR where each
  16229. // element is identical is a splat. A BUILD_VECTOR where most of the operands
  16230. // are undef lowers to a small number of element insertions.
  16231. //
  16232. // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
  16233. // We don't fold shuffles where one side is a non-zero constant, and we don't
  16234. // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
  16235. // non-constant operands. This seems to work out reasonably well in practice.
  16236. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
  16237. SelectionDAG &DAG,
  16238. const TargetLowering &TLI) {
  16239. EVT VT = SVN->getValueType(0);
  16240. unsigned NumElts = VT.getVectorNumElements();
  16241. SDValue N0 = SVN->getOperand(0);
  16242. SDValue N1 = SVN->getOperand(1);
  16243. if (!N0->hasOneUse())
  16244. return SDValue();
  16245. // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
  16246. // discussed above.
  16247. if (!N1.isUndef()) {
  16248. if (!N1->hasOneUse())
  16249. return SDValue();
  16250. bool N0AnyConst = isAnyConstantBuildVector(N0);
  16251. bool N1AnyConst = isAnyConstantBuildVector(N1);
  16252. if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
  16253. return SDValue();
  16254. if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
  16255. return SDValue();
  16256. }
  16257. // If both inputs are splats of the same value then we can safely merge this
  16258. // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
  16259. bool IsSplat = false;
  16260. auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
  16261. auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
  16262. if (BV0 && BV1)
  16263. if (SDValue Splat0 = BV0->getSplatValue())
  16264. IsSplat = (Splat0 == BV1->getSplatValue());
  16265. SmallVector<SDValue, 8> Ops;
  16266. SmallSet<SDValue, 16> DuplicateOps;
  16267. for (int M : SVN->getMask()) {
  16268. SDValue Op = DAG.getUNDEF(VT.getScalarType());
  16269. if (M >= 0) {
  16270. int Idx = M < (int)NumElts ? M : M - NumElts;
  16271. SDValue &S = (M < (int)NumElts ? N0 : N1);
  16272. if (S.getOpcode() == ISD::BUILD_VECTOR) {
  16273. Op = S.getOperand(Idx);
  16274. } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
  16275. SDValue Op0 = S.getOperand(0);
  16276. Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
  16277. } else {
  16278. // Operand can't be combined - bail out.
  16279. return SDValue();
  16280. }
  16281. }
  16282. // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
  16283. // generating a splat; semantically, this is fine, but it's likely to
  16284. // generate low-quality code if the target can't reconstruct an appropriate
  16285. // shuffle.
  16286. if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
  16287. if (!IsSplat && !DuplicateOps.insert(Op).second)
  16288. return SDValue();
  16289. Ops.push_back(Op);
  16290. }
  16291. // BUILD_VECTOR requires all inputs to be of the same type, find the
  16292. // maximum type and extend them all.
  16293. EVT SVT = VT.getScalarType();
  16294. if (SVT.isInteger())
  16295. for (SDValue &Op : Ops)
  16296. SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
  16297. if (SVT != VT.getScalarType())
  16298. for (SDValue &Op : Ops)
  16299. Op = TLI.isZExtFree(Op.getValueType(), SVT)
  16300. ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
  16301. : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
  16302. return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
  16303. }
  16304. // Match shuffles that can be converted to any_vector_extend_in_reg.
  16305. // This is often generated during legalization.
  16306. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
  16307. // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
  16308. static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
  16309. SelectionDAG &DAG,
  16310. const TargetLowering &TLI,
  16311. bool LegalOperations) {
  16312. EVT VT = SVN->getValueType(0);
  16313. bool IsBigEndian = DAG.getDataLayout().isBigEndian();
  16314. // TODO Add support for big-endian when we have a test case.
  16315. if (!VT.isInteger() || IsBigEndian)
  16316. return SDValue();
  16317. unsigned NumElts = VT.getVectorNumElements();
  16318. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  16319. ArrayRef<int> Mask = SVN->getMask();
  16320. SDValue N0 = SVN->getOperand(0);
  16321. // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
  16322. auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
  16323. for (unsigned i = 0; i != NumElts; ++i) {
  16324. if (Mask[i] < 0)
  16325. continue;
  16326. if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
  16327. continue;
  16328. return false;
  16329. }
  16330. return true;
  16331. };
  16332. // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
  16333. // power-of-2 extensions as they are the most likely.
  16334. for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
  16335. // Check for non power of 2 vector sizes
  16336. if (NumElts % Scale != 0)
  16337. continue;
  16338. if (!isAnyExtend(Scale))
  16339. continue;
  16340. EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
  16341. EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
  16342. // Never create an illegal type. Only create unsupported operations if we
  16343. // are pre-legalization.
  16344. if (TLI.isTypeLegal(OutVT))
  16345. if (!LegalOperations ||
  16346. TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
  16347. return DAG.getBitcast(VT,
  16348. DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
  16349. SDLoc(SVN), OutVT, N0));
  16350. }
  16351. return SDValue();
  16352. }
  16353. // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
  16354. // each source element of a large type into the lowest elements of a smaller
  16355. // destination type. This is often generated during legalization.
  16356. // If the source node itself was a '*_extend_vector_inreg' node then we should
  16357. // then be able to remove it.
  16358. static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
  16359. SelectionDAG &DAG) {
  16360. EVT VT = SVN->getValueType(0);
  16361. bool IsBigEndian = DAG.getDataLayout().isBigEndian();
  16362. // TODO Add support for big-endian when we have a test case.
  16363. if (!VT.isInteger() || IsBigEndian)
  16364. return SDValue();
  16365. SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
  16366. unsigned Opcode = N0.getOpcode();
  16367. if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
  16368. Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
  16369. Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
  16370. return SDValue();
  16371. SDValue N00 = N0.getOperand(0);
  16372. ArrayRef<int> Mask = SVN->getMask();
  16373. unsigned NumElts = VT.getVectorNumElements();
  16374. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  16375. unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
  16376. unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
  16377. if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
  16378. return SDValue();
  16379. unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
  16380. // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
  16381. // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
  16382. // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
  16383. auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
  16384. for (unsigned i = 0; i != NumElts; ++i) {
  16385. if (Mask[i] < 0)
  16386. continue;
  16387. if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
  16388. continue;
  16389. return false;
  16390. }
  16391. return true;
  16392. };
  16393. // At the moment we just handle the case where we've truncated back to the
  16394. // same size as before the extension.
  16395. // TODO: handle more extension/truncation cases as cases arise.
  16396. if (EltSizeInBits != ExtSrcSizeInBits)
  16397. return SDValue();
  16398. // We can remove *extend_vector_inreg only if the truncation happens at
  16399. // the same scale as the extension.
  16400. if (isTruncate(ExtScale))
  16401. return DAG.getBitcast(VT, N00);
  16402. return SDValue();
  16403. }
  16404. // Combine shuffles of splat-shuffles of the form:
  16405. // shuffle (shuffle V, undef, splat-mask), undef, M
  16406. // If splat-mask contains undef elements, we need to be careful about
  16407. // introducing undef's in the folded mask which are not the result of composing
  16408. // the masks of the shuffles.
  16409. static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
  16410. SelectionDAG &DAG) {
  16411. if (!Shuf->getOperand(1).isUndef())
  16412. return SDValue();
  16413. auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
  16414. if (!Splat || !Splat->isSplat())
  16415. return SDValue();
  16416. ArrayRef<int> ShufMask = Shuf->getMask();
  16417. ArrayRef<int> SplatMask = Splat->getMask();
  16418. assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
  16419. // Prefer simplifying to the splat-shuffle, if possible. This is legal if
  16420. // every undef mask element in the splat-shuffle has a corresponding undef
  16421. // element in the user-shuffle's mask or if the composition of mask elements
  16422. // would result in undef.
  16423. // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
  16424. // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
  16425. // In this case it is not legal to simplify to the splat-shuffle because we
  16426. // may be exposing the users of the shuffle an undef element at index 1
  16427. // which was not there before the combine.
  16428. // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
  16429. // In this case the composition of masks yields SplatMask, so it's ok to
  16430. // simplify to the splat-shuffle.
  16431. // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
  16432. // In this case the composed mask includes all undef elements of SplatMask
  16433. // and in addition sets element zero to undef. It is safe to simplify to
  16434. // the splat-shuffle.
  16435. auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
  16436. ArrayRef<int> SplatMask) {
  16437. for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
  16438. if (UserMask[i] != -1 && SplatMask[i] == -1 &&
  16439. SplatMask[UserMask[i]] != -1)
  16440. return false;
  16441. return true;
  16442. };
  16443. if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
  16444. return Shuf->getOperand(0);
  16445. // Create a new shuffle with a mask that is composed of the two shuffles'
  16446. // masks.
  16447. SmallVector<int, 32> NewMask;
  16448. for (int Idx : ShufMask)
  16449. NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
  16450. return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
  16451. Splat->getOperand(0), Splat->getOperand(1),
  16452. NewMask);
  16453. }
  16454. /// If the shuffle mask is taking exactly one element from the first vector
  16455. /// operand and passing through all other elements from the second vector
  16456. /// operand, return the index of the mask element that is choosing an element
  16457. /// from the first operand. Otherwise, return -1.
  16458. static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
  16459. int MaskSize = Mask.size();
  16460. int EltFromOp0 = -1;
  16461. // TODO: This does not match if there are undef elements in the shuffle mask.
  16462. // Should we ignore undefs in the shuffle mask instead? The trade-off is
  16463. // removing an instruction (a shuffle), but losing the knowledge that some
  16464. // vector lanes are not needed.
  16465. for (int i = 0; i != MaskSize; ++i) {
  16466. if (Mask[i] >= 0 && Mask[i] < MaskSize) {
  16467. // We're looking for a shuffle of exactly one element from operand 0.
  16468. if (EltFromOp0 != -1)
  16469. return -1;
  16470. EltFromOp0 = i;
  16471. } else if (Mask[i] != i + MaskSize) {
  16472. // Nothing from operand 1 can change lanes.
  16473. return -1;
  16474. }
  16475. }
  16476. return EltFromOp0;
  16477. }
  16478. /// If a shuffle inserts exactly one element from a source vector operand into
  16479. /// another vector operand and we can access the specified element as a scalar,
  16480. /// then we can eliminate the shuffle.
  16481. static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
  16482. SelectionDAG &DAG) {
  16483. // First, check if we are taking one element of a vector and shuffling that
  16484. // element into another vector.
  16485. ArrayRef<int> Mask = Shuf->getMask();
  16486. SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
  16487. SDValue Op0 = Shuf->getOperand(0);
  16488. SDValue Op1 = Shuf->getOperand(1);
  16489. int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
  16490. if (ShufOp0Index == -1) {
  16491. // Commute mask and check again.
  16492. ShuffleVectorSDNode::commuteMask(CommutedMask);
  16493. ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
  16494. if (ShufOp0Index == -1)
  16495. return SDValue();
  16496. // Commute operands to match the commuted shuffle mask.
  16497. std::swap(Op0, Op1);
  16498. Mask = CommutedMask;
  16499. }
  16500. // The shuffle inserts exactly one element from operand 0 into operand 1.
  16501. // Now see if we can access that element as a scalar via a real insert element
  16502. // instruction.
  16503. // TODO: We can try harder to locate the element as a scalar. Examples: it
  16504. // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
  16505. assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
  16506. "Shuffle mask value must be from operand 0");
  16507. if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
  16508. return SDValue();
  16509. auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
  16510. if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
  16511. return SDValue();
  16512. // There's an existing insertelement with constant insertion index, so we
  16513. // don't need to check the legality/profitability of a replacement operation
  16514. // that differs at most in the constant value. The target should be able to
  16515. // lower any of those in a similar way. If not, legalization will expand this
  16516. // to a scalar-to-vector plus shuffle.
  16517. //
  16518. // Note that the shuffle may move the scalar from the position that the insert
  16519. // element used. Therefore, our new insert element occurs at the shuffle's
  16520. // mask index value, not the insert's index value.
  16521. // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
  16522. SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
  16523. Op0.getOperand(2).getValueType());
  16524. return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
  16525. Op1, Op0.getOperand(1), NewInsIndex);
  16526. }
  16527. /// If we have a unary shuffle of a shuffle, see if it can be folded away
  16528. /// completely. This has the potential to lose undef knowledge because the first
  16529. /// shuffle may not have an undef mask element where the second one does. So
  16530. /// only call this after doing simplifications based on demanded elements.
  16531. static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
  16532. // shuf (shuf0 X, Y, Mask0), undef, Mask
  16533. auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
  16534. if (!Shuf0 || !Shuf->getOperand(1).isUndef())
  16535. return SDValue();
  16536. ArrayRef<int> Mask = Shuf->getMask();
  16537. ArrayRef<int> Mask0 = Shuf0->getMask();
  16538. for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
  16539. // Ignore undef elements.
  16540. if (Mask[i] == -1)
  16541. continue;
  16542. assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
  16543. // Is the element of the shuffle operand chosen by this shuffle the same as
  16544. // the element chosen by the shuffle operand itself?
  16545. if (Mask0[Mask[i]] != Mask0[i])
  16546. return SDValue();
  16547. }
  16548. // Every element of this shuffle is identical to the result of the previous
  16549. // shuffle, so we can replace this value.
  16550. return Shuf->getOperand(0);
  16551. }
  16552. SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  16553. EVT VT = N->getValueType(0);
  16554. unsigned NumElts = VT.getVectorNumElements();
  16555. SDValue N0 = N->getOperand(0);
  16556. SDValue N1 = N->getOperand(1);
  16557. assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
  16558. // Canonicalize shuffle undef, undef -> undef
  16559. if (N0.isUndef() && N1.isUndef())
  16560. return DAG.getUNDEF(VT);
  16561. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  16562. // Canonicalize shuffle v, v -> v, undef
  16563. if (N0 == N1) {
  16564. SmallVector<int, 8> NewMask;
  16565. for (unsigned i = 0; i != NumElts; ++i) {
  16566. int Idx = SVN->getMaskElt(i);
  16567. if (Idx >= (int)NumElts) Idx -= NumElts;
  16568. NewMask.push_back(Idx);
  16569. }
  16570. return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
  16571. }
  16572. // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
  16573. if (N0.isUndef())
  16574. return DAG.getCommutedVectorShuffle(*SVN);
  16575. // Remove references to rhs if it is undef
  16576. if (N1.isUndef()) {
  16577. bool Changed = false;
  16578. SmallVector<int, 8> NewMask;
  16579. for (unsigned i = 0; i != NumElts; ++i) {
  16580. int Idx = SVN->getMaskElt(i);
  16581. if (Idx >= (int)NumElts) {
  16582. Idx = -1;
  16583. Changed = true;
  16584. }
  16585. NewMask.push_back(Idx);
  16586. }
  16587. if (Changed)
  16588. return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
  16589. }
  16590. if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
  16591. return InsElt;
  16592. // A shuffle of a single vector that is a splatted value can always be folded.
  16593. if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
  16594. return V;
  16595. // If it is a splat, check if the argument vector is another splat or a
  16596. // build_vector.
  16597. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
  16598. int SplatIndex = SVN->getSplatIndex();
  16599. if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
  16600. TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
  16601. // splat (vector_bo L, R), Index -->
  16602. // splat (scalar_bo (extelt L, Index), (extelt R, Index))
  16603. SDValue L = N0.getOperand(0), R = N0.getOperand(1);
  16604. SDLoc DL(N);
  16605. EVT EltVT = VT.getScalarType();
  16606. SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
  16607. SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
  16608. SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
  16609. SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
  16610. N0.getNode()->getFlags());
  16611. SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
  16612. SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
  16613. return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
  16614. }
  16615. // If this is a bit convert that changes the element type of the vector but
  16616. // not the number of vector elements, look through it. Be careful not to
  16617. // look though conversions that change things like v4f32 to v2f64.
  16618. SDNode *V = N0.getNode();
  16619. if (V->getOpcode() == ISD::BITCAST) {
  16620. SDValue ConvInput = V->getOperand(0);
  16621. if (ConvInput.getValueType().isVector() &&
  16622. ConvInput.getValueType().getVectorNumElements() == NumElts)
  16623. V = ConvInput.getNode();
  16624. }
  16625. if (V->getOpcode() == ISD::BUILD_VECTOR) {
  16626. assert(V->getNumOperands() == NumElts &&
  16627. "BUILD_VECTOR has wrong number of operands");
  16628. SDValue Base;
  16629. bool AllSame = true;
  16630. for (unsigned i = 0; i != NumElts; ++i) {
  16631. if (!V->getOperand(i).isUndef()) {
  16632. Base = V->getOperand(i);
  16633. break;
  16634. }
  16635. }
  16636. // Splat of <u, u, u, u>, return <u, u, u, u>
  16637. if (!Base.getNode())
  16638. return N0;
  16639. for (unsigned i = 0; i != NumElts; ++i) {
  16640. if (V->getOperand(i) != Base) {
  16641. AllSame = false;
  16642. break;
  16643. }
  16644. }
  16645. // Splat of <x, x, x, x>, return <x, x, x, x>
  16646. if (AllSame)
  16647. return N0;
  16648. // Canonicalize any other splat as a build_vector.
  16649. SDValue Splatted = V->getOperand(SplatIndex);
  16650. SmallVector<SDValue, 8> Ops(NumElts, Splatted);
  16651. SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
  16652. // We may have jumped through bitcasts, so the type of the
  16653. // BUILD_VECTOR may not match the type of the shuffle.
  16654. if (V->getValueType(0) != VT)
  16655. NewBV = DAG.getBitcast(VT, NewBV);
  16656. return NewBV;
  16657. }
  16658. }
  16659. // Simplify source operands based on shuffle mask.
  16660. if (SimplifyDemandedVectorElts(SDValue(N, 0)))
  16661. return SDValue(N, 0);
  16662. // This is intentionally placed after demanded elements simplification because
  16663. // it could eliminate knowledge of undef elements created by this shuffle.
  16664. if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
  16665. return ShufOp;
  16666. // Match shuffles that can be converted to any_vector_extend_in_reg.
  16667. if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
  16668. return V;
  16669. // Combine "truncate_vector_in_reg" style shuffles.
  16670. if (SDValue V = combineTruncationShuffle(SVN, DAG))
  16671. return V;
  16672. if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
  16673. Level < AfterLegalizeVectorOps &&
  16674. (N1.isUndef() ||
  16675. (N1.getOpcode() == ISD::CONCAT_VECTORS &&
  16676. N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
  16677. if (SDValue V = partitionShuffleOfConcats(N, DAG))
  16678. return V;
  16679. }
  16680. // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
  16681. // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
  16682. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
  16683. if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
  16684. return Res;
  16685. // If this shuffle only has a single input that is a bitcasted shuffle,
  16686. // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
  16687. // back to their original types.
  16688. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
  16689. N1.isUndef() && Level < AfterLegalizeVectorOps &&
  16690. TLI.isTypeLegal(VT)) {
  16691. auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
  16692. if (Scale == 1)
  16693. return SmallVector<int, 8>(Mask.begin(), Mask.end());
  16694. SmallVector<int, 8> NewMask;
  16695. for (int M : Mask)
  16696. for (int s = 0; s != Scale; ++s)
  16697. NewMask.push_back(M < 0 ? -1 : Scale * M + s);
  16698. return NewMask;
  16699. };
  16700. SDValue BC0 = peekThroughOneUseBitcasts(N0);
  16701. if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
  16702. EVT SVT = VT.getScalarType();
  16703. EVT InnerVT = BC0->getValueType(0);
  16704. EVT InnerSVT = InnerVT.getScalarType();
  16705. // Determine which shuffle works with the smaller scalar type.
  16706. EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
  16707. EVT ScaleSVT = ScaleVT.getScalarType();
  16708. if (TLI.isTypeLegal(ScaleVT) &&
  16709. 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
  16710. 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
  16711. int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
  16712. int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
  16713. // Scale the shuffle masks to the smaller scalar type.
  16714. ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
  16715. SmallVector<int, 8> InnerMask =
  16716. ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
  16717. SmallVector<int, 8> OuterMask =
  16718. ScaleShuffleMask(SVN->getMask(), OuterScale);
  16719. // Merge the shuffle masks.
  16720. SmallVector<int, 8> NewMask;
  16721. for (int M : OuterMask)
  16722. NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
  16723. // Test for shuffle mask legality over both commutations.
  16724. SDValue SV0 = BC0->getOperand(0);
  16725. SDValue SV1 = BC0->getOperand(1);
  16726. bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
  16727. if (!LegalMask) {
  16728. std::swap(SV0, SV1);
  16729. ShuffleVectorSDNode::commuteMask(NewMask);
  16730. LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
  16731. }
  16732. if (LegalMask) {
  16733. SV0 = DAG.getBitcast(ScaleVT, SV0);
  16734. SV1 = DAG.getBitcast(ScaleVT, SV1);
  16735. return DAG.getBitcast(
  16736. VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
  16737. }
  16738. }
  16739. }
  16740. }
  16741. // Canonicalize shuffles according to rules:
  16742. // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
  16743. // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
  16744. // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
  16745. if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
  16746. N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
  16747. TLI.isTypeLegal(VT)) {
  16748. // The incoming shuffle must be of the same type as the result of the
  16749. // current shuffle.
  16750. assert(N1->getOperand(0).getValueType() == VT &&
  16751. "Shuffle types don't match");
  16752. SDValue SV0 = N1->getOperand(0);
  16753. SDValue SV1 = N1->getOperand(1);
  16754. bool HasSameOp0 = N0 == SV0;
  16755. bool IsSV1Undef = SV1.isUndef();
  16756. if (HasSameOp0 || IsSV1Undef || N0 == SV1)
  16757. // Commute the operands of this shuffle so that next rule
  16758. // will trigger.
  16759. return DAG.getCommutedVectorShuffle(*SVN);
  16760. }
  16761. // Try to fold according to rules:
  16762. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
  16763. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
  16764. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
  16765. // Don't try to fold shuffles with illegal type.
  16766. // Only fold if this shuffle is the only user of the other shuffle.
  16767. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
  16768. Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
  16769. ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  16770. // Don't try to fold splats; they're likely to simplify somehow, or they
  16771. // might be free.
  16772. if (OtherSV->isSplat())
  16773. return SDValue();
  16774. // The incoming shuffle must be of the same type as the result of the
  16775. // current shuffle.
  16776. assert(OtherSV->getOperand(0).getValueType() == VT &&
  16777. "Shuffle types don't match");
  16778. SDValue SV0, SV1;
  16779. SmallVector<int, 4> Mask;
  16780. // Compute the combined shuffle mask for a shuffle with SV0 as the first
  16781. // operand, and SV1 as the second operand.
  16782. for (unsigned i = 0; i != NumElts; ++i) {
  16783. int Idx = SVN->getMaskElt(i);
  16784. if (Idx < 0) {
  16785. // Propagate Undef.
  16786. Mask.push_back(Idx);
  16787. continue;
  16788. }
  16789. SDValue CurrentVec;
  16790. if (Idx < (int)NumElts) {
  16791. // This shuffle index refers to the inner shuffle N0. Lookup the inner
  16792. // shuffle mask to identify which vector is actually referenced.
  16793. Idx = OtherSV->getMaskElt(Idx);
  16794. if (Idx < 0) {
  16795. // Propagate Undef.
  16796. Mask.push_back(Idx);
  16797. continue;
  16798. }
  16799. CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
  16800. : OtherSV->getOperand(1);
  16801. } else {
  16802. // This shuffle index references an element within N1.
  16803. CurrentVec = N1;
  16804. }
  16805. // Simple case where 'CurrentVec' is UNDEF.
  16806. if (CurrentVec.isUndef()) {
  16807. Mask.push_back(-1);
  16808. continue;
  16809. }
  16810. // Canonicalize the shuffle index. We don't know yet if CurrentVec
  16811. // will be the first or second operand of the combined shuffle.
  16812. Idx = Idx % NumElts;
  16813. if (!SV0.getNode() || SV0 == CurrentVec) {
  16814. // Ok. CurrentVec is the left hand side.
  16815. // Update the mask accordingly.
  16816. SV0 = CurrentVec;
  16817. Mask.push_back(Idx);
  16818. continue;
  16819. }
  16820. // Bail out if we cannot convert the shuffle pair into a single shuffle.
  16821. if (SV1.getNode() && SV1 != CurrentVec)
  16822. return SDValue();
  16823. // Ok. CurrentVec is the right hand side.
  16824. // Update the mask accordingly.
  16825. SV1 = CurrentVec;
  16826. Mask.push_back(Idx + NumElts);
  16827. }
  16828. // Check if all indices in Mask are Undef. In case, propagate Undef.
  16829. bool isUndefMask = true;
  16830. for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
  16831. isUndefMask &= Mask[i] < 0;
  16832. if (isUndefMask)
  16833. return DAG.getUNDEF(VT);
  16834. if (!SV0.getNode())
  16835. SV0 = DAG.getUNDEF(VT);
  16836. if (!SV1.getNode())
  16837. SV1 = DAG.getUNDEF(VT);
  16838. // Avoid introducing shuffles with illegal mask.
  16839. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
  16840. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
  16841. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
  16842. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
  16843. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
  16844. // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
  16845. return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
  16846. }
  16847. if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
  16848. return V;
  16849. return SDValue();
  16850. }
  16851. SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
  16852. SDValue InVal = N->getOperand(0);
  16853. EVT VT = N->getValueType(0);
  16854. // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
  16855. // with a VECTOR_SHUFFLE and possible truncate.
  16856. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
  16857. SDValue InVec = InVal->getOperand(0);
  16858. SDValue EltNo = InVal->getOperand(1);
  16859. auto InVecT = InVec.getValueType();
  16860. if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
  16861. SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
  16862. int Elt = C0->getZExtValue();
  16863. NewMask[0] = Elt;
  16864. // If we have an implict truncate do truncate here as long as it's legal.
  16865. // if it's not legal, this should
  16866. if (VT.getScalarType() != InVal.getValueType() &&
  16867. InVal.getValueType().isScalarInteger() &&
  16868. isTypeLegal(VT.getScalarType())) {
  16869. SDValue Val =
  16870. DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
  16871. return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
  16872. }
  16873. if (VT.getScalarType() == InVecT.getScalarType() &&
  16874. VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
  16875. SDValue LegalShuffle =
  16876. TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
  16877. DAG.getUNDEF(InVecT), NewMask, DAG);
  16878. if (LegalShuffle) {
  16879. // If the initial vector is the correct size this shuffle is a
  16880. // valid result.
  16881. if (VT == InVecT)
  16882. return LegalShuffle;
  16883. // If not we must truncate the vector.
  16884. if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
  16885. MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
  16886. SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
  16887. EVT SubVT =
  16888. EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
  16889. VT.getVectorNumElements());
  16890. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
  16891. LegalShuffle, ZeroIdx);
  16892. }
  16893. }
  16894. }
  16895. }
  16896. }
  16897. return SDValue();
  16898. }
  16899. SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
  16900. EVT VT = N->getValueType(0);
  16901. SDValue N0 = N->getOperand(0);
  16902. SDValue N1 = N->getOperand(1);
  16903. SDValue N2 = N->getOperand(2);
  16904. // If inserting an UNDEF, just return the original vector.
  16905. if (N1.isUndef())
  16906. return N0;
  16907. // If this is an insert of an extracted vector into an undef vector, we can
  16908. // just use the input to the extract.
  16909. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
  16910. N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
  16911. return N1.getOperand(0);
  16912. // If we are inserting a bitcast value into an undef, with the same
  16913. // number of elements, just use the bitcast input of the extract.
  16914. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
  16915. // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
  16916. if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
  16917. N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
  16918. N1.getOperand(0).getOperand(1) == N2 &&
  16919. N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
  16920. VT.getVectorNumElements() &&
  16921. N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
  16922. VT.getSizeInBits()) {
  16923. return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
  16924. }
  16925. // If both N1 and N2 are bitcast values on which insert_subvector
  16926. // would makes sense, pull the bitcast through.
  16927. // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
  16928. // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
  16929. if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
  16930. SDValue CN0 = N0.getOperand(0);
  16931. SDValue CN1 = N1.getOperand(0);
  16932. EVT CN0VT = CN0.getValueType();
  16933. EVT CN1VT = CN1.getValueType();
  16934. if (CN0VT.isVector() && CN1VT.isVector() &&
  16935. CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
  16936. CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
  16937. SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
  16938. CN0.getValueType(), CN0, CN1, N2);
  16939. return DAG.getBitcast(VT, NewINSERT);
  16940. }
  16941. }
  16942. // Combine INSERT_SUBVECTORs where we are inserting to the same index.
  16943. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
  16944. // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
  16945. if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
  16946. N0.getOperand(1).getValueType() == N1.getValueType() &&
  16947. N0.getOperand(2) == N2)
  16948. return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
  16949. N1, N2);
  16950. // Eliminate an intermediate insert into an undef vector:
  16951. // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
  16952. // insert_subvector undef, X, N2
  16953. if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
  16954. N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
  16955. return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
  16956. N1.getOperand(1), N2);
  16957. if (!isa<ConstantSDNode>(N2))
  16958. return SDValue();
  16959. uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
  16960. // Push subvector bitcasts to the output, adjusting the index as we go.
  16961. // insert_subvector(bitcast(v), bitcast(s), c1)
  16962. // -> bitcast(insert_subvector(v, s, c2))
  16963. if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
  16964. N1.getOpcode() == ISD::BITCAST) {
  16965. SDValue N0Src = peekThroughBitcasts(N0);
  16966. SDValue N1Src = peekThroughBitcasts(N1);
  16967. EVT N0SrcSVT = N0Src.getValueType().getScalarType();
  16968. EVT N1SrcSVT = N1Src.getValueType().getScalarType();
  16969. if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
  16970. N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
  16971. EVT NewVT;
  16972. SDLoc DL(N);
  16973. SDValue NewIdx;
  16974. MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
  16975. LLVMContext &Ctx = *DAG.getContext();
  16976. unsigned NumElts = VT.getVectorNumElements();
  16977. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  16978. if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
  16979. unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
  16980. NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
  16981. NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
  16982. } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
  16983. unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
  16984. if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
  16985. NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
  16986. NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
  16987. }
  16988. }
  16989. if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
  16990. SDValue Res = DAG.getBitcast(NewVT, N0Src);
  16991. Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
  16992. return DAG.getBitcast(VT, Res);
  16993. }
  16994. }
  16995. }
  16996. // Canonicalize insert_subvector dag nodes.
  16997. // Example:
  16998. // (insert_subvector (insert_subvector A, Idx0), Idx1)
  16999. // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
  17000. if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
  17001. N1.getValueType() == N0.getOperand(1).getValueType() &&
  17002. isa<ConstantSDNode>(N0.getOperand(2))) {
  17003. unsigned OtherIdx = N0.getConstantOperandVal(2);
  17004. if (InsIdx < OtherIdx) {
  17005. // Swap nodes.
  17006. SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
  17007. N0.getOperand(0), N1, N2);
  17008. AddToWorklist(NewOp.getNode());
  17009. return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
  17010. VT, NewOp, N0.getOperand(1), N0.getOperand(2));
  17011. }
  17012. }
  17013. // If the input vector is a concatenation, and the insert replaces
  17014. // one of the pieces, we can optimize into a single concat_vectors.
  17015. if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
  17016. N0.getOperand(0).getValueType() == N1.getValueType()) {
  17017. unsigned Factor = N1.getValueType().getVectorNumElements();
  17018. SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
  17019. Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
  17020. return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  17021. }
  17022. // Simplify source operands based on insertion.
  17023. if (SimplifyDemandedVectorElts(SDValue(N, 0)))
  17024. return SDValue(N, 0);
  17025. return SDValue();
  17026. }
  17027. SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
  17028. SDValue N0 = N->getOperand(0);
  17029. // fold (fp_to_fp16 (fp16_to_fp op)) -> op
  17030. if (N0->getOpcode() == ISD::FP16_TO_FP)
  17031. return N0->getOperand(0);
  17032. return SDValue();
  17033. }
  17034. SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
  17035. SDValue N0 = N->getOperand(0);
  17036. // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
  17037. if (N0->getOpcode() == ISD::AND) {
  17038. ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
  17039. if (AndConst && AndConst->getAPIntValue() == 0xffff) {
  17040. return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
  17041. N0.getOperand(0));
  17042. }
  17043. }
  17044. return SDValue();
  17045. }
  17046. SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
  17047. SDValue N0 = N->getOperand(0);
  17048. EVT VT = N0.getValueType();
  17049. unsigned Opcode = N->getOpcode();
  17050. // VECREDUCE over 1-element vector is just an extract.
  17051. if (VT.getVectorNumElements() == 1) {
  17052. SDLoc dl(N);
  17053. SDValue Res = DAG.getNode(
  17054. ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
  17055. DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
  17056. if (Res.getValueType() != N->getValueType(0))
  17057. Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
  17058. return Res;
  17059. }
  17060. // On an boolean vector an and/or reduction is the same as a umin/umax
  17061. // reduction. Convert them if the latter is legal while the former isn't.
  17062. if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
  17063. unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
  17064. ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
  17065. if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
  17066. TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
  17067. DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
  17068. return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
  17069. }
  17070. return SDValue();
  17071. }
  17072. /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
  17073. /// with the destination vector and a zero vector.
  17074. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
  17075. /// vector_shuffle V, Zero, <0, 4, 2, 4>
  17076. SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
  17077. assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
  17078. EVT VT = N->getValueType(0);
  17079. SDValue LHS = N->getOperand(0);
  17080. SDValue RHS = peekThroughBitcasts(N->getOperand(1));
  17081. SDLoc DL(N);
  17082. // Make sure we're not running after operation legalization where it
  17083. // may have custom lowered the vector shuffles.
  17084. if (LegalOperations)
  17085. return SDValue();
  17086. if (RHS.getOpcode() != ISD::BUILD_VECTOR)
  17087. return SDValue();
  17088. EVT RVT = RHS.getValueType();
  17089. unsigned NumElts = RHS.getNumOperands();
  17090. // Attempt to create a valid clear mask, splitting the mask into
  17091. // sub elements and checking to see if each is
  17092. // all zeros or all ones - suitable for shuffle masking.
  17093. auto BuildClearMask = [&](int Split) {
  17094. int NumSubElts = NumElts * Split;
  17095. int NumSubBits = RVT.getScalarSizeInBits() / Split;
  17096. SmallVector<int, 8> Indices;
  17097. for (int i = 0; i != NumSubElts; ++i) {
  17098. int EltIdx = i / Split;
  17099. int SubIdx = i % Split;
  17100. SDValue Elt = RHS.getOperand(EltIdx);
  17101. if (Elt.isUndef()) {
  17102. Indices.push_back(-1);
  17103. continue;
  17104. }
  17105. APInt Bits;
  17106. if (isa<ConstantSDNode>(Elt))
  17107. Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
  17108. else if (isa<ConstantFPSDNode>(Elt))
  17109. Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
  17110. else
  17111. return SDValue();
  17112. // Extract the sub element from the constant bit mask.
  17113. if (DAG.getDataLayout().isBigEndian()) {
  17114. Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
  17115. } else {
  17116. Bits.lshrInPlace(SubIdx * NumSubBits);
  17117. }
  17118. if (Split > 1)
  17119. Bits = Bits.trunc(NumSubBits);
  17120. if (Bits.isAllOnesValue())
  17121. Indices.push_back(i);
  17122. else if (Bits == 0)
  17123. Indices.push_back(i + NumSubElts);
  17124. else
  17125. return SDValue();
  17126. }
  17127. // Let's see if the target supports this vector_shuffle.
  17128. EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
  17129. EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
  17130. if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
  17131. return SDValue();
  17132. SDValue Zero = DAG.getConstant(0, DL, ClearVT);
  17133. return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
  17134. DAG.getBitcast(ClearVT, LHS),
  17135. Zero, Indices));
  17136. };
  17137. // Determine maximum split level (byte level masking).
  17138. int MaxSplit = 1;
  17139. if (RVT.getScalarSizeInBits() % 8 == 0)
  17140. MaxSplit = RVT.getScalarSizeInBits() / 8;
  17141. for (int Split = 1; Split <= MaxSplit; ++Split)
  17142. if (RVT.getScalarSizeInBits() % Split == 0)
  17143. if (SDValue S = BuildClearMask(Split))
  17144. return S;
  17145. return SDValue();
  17146. }
  17147. /// If a vector binop is performed on splat values, it may be profitable to
  17148. /// extract, scalarize, and insert/splat.
  17149. static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
  17150. SDValue N0 = N->getOperand(0);
  17151. SDValue N1 = N->getOperand(1);
  17152. unsigned Opcode = N->getOpcode();
  17153. EVT VT = N->getValueType(0);
  17154. EVT EltVT = VT.getVectorElementType();
  17155. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  17156. // TODO: Remove/replace the extract cost check? If the elements are available
  17157. // as scalars, then there may be no extract cost. Should we ask if
  17158. // inserting a scalar back into a vector is cheap instead?
  17159. int Index0, Index1;
  17160. SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
  17161. SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
  17162. if (!Src0 || !Src1 || Index0 != Index1 ||
  17163. Src0.getValueType().getVectorElementType() != EltVT ||
  17164. Src1.getValueType().getVectorElementType() != EltVT ||
  17165. !TLI.isExtractVecEltCheap(VT, Index0) ||
  17166. !TLI.isOperationLegalOrCustom(Opcode, EltVT))
  17167. return SDValue();
  17168. SDLoc DL(N);
  17169. SDValue IndexC =
  17170. DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
  17171. SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
  17172. SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
  17173. SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
  17174. // If all lanes but 1 are undefined, no need to splat the scalar result.
  17175. // TODO: Keep track of undefs and use that info in the general case.
  17176. if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
  17177. count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
  17178. count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
  17179. // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
  17180. // build_vec ..undef, (bo X, Y), undef...
  17181. SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
  17182. Ops[Index0] = ScalarBO;
  17183. return DAG.getBuildVector(VT, DL, Ops);
  17184. }
  17185. // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
  17186. SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
  17187. return DAG.getBuildVector(VT, DL, Ops);
  17188. }
  17189. /// Visit a binary vector operation, like ADD.
  17190. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
  17191. assert(N->getValueType(0).isVector() &&
  17192. "SimplifyVBinOp only works on vectors!");
  17193. SDValue LHS = N->getOperand(0);
  17194. SDValue RHS = N->getOperand(1);
  17195. SDValue Ops[] = {LHS, RHS};
  17196. EVT VT = N->getValueType(0);
  17197. unsigned Opcode = N->getOpcode();
  17198. // See if we can constant fold the vector operation.
  17199. if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
  17200. Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
  17201. return Fold;
  17202. // Move unary shuffles with identical masks after a vector binop:
  17203. // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
  17204. // --> shuffle (VBinOp A, B), Undef, Mask
  17205. // This does not require type legality checks because we are creating the
  17206. // same types of operations that are in the original sequence. We do have to
  17207. // restrict ops like integer div that have immediate UB (eg, div-by-zero)
  17208. // though. This code is adapted from the identical transform in instcombine.
  17209. if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
  17210. Opcode != ISD::UREM && Opcode != ISD::SREM &&
  17211. Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
  17212. auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
  17213. auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
  17214. if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
  17215. LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
  17216. (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
  17217. SDLoc DL(N);
  17218. SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
  17219. RHS.getOperand(0), N->getFlags());
  17220. SDValue UndefV = LHS.getOperand(1);
  17221. return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
  17222. }
  17223. }
  17224. // The following pattern is likely to emerge with vector reduction ops. Moving
  17225. // the binary operation ahead of insertion may allow using a narrower vector
  17226. // instruction that has better performance than the wide version of the op:
  17227. // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
  17228. if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
  17229. RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
  17230. LHS.getOperand(2) == RHS.getOperand(2) &&
  17231. (LHS.hasOneUse() || RHS.hasOneUse())) {
  17232. SDValue X = LHS.getOperand(1);
  17233. SDValue Y = RHS.getOperand(1);
  17234. SDValue Z = LHS.getOperand(2);
  17235. EVT NarrowVT = X.getValueType();
  17236. if (NarrowVT == Y.getValueType() &&
  17237. TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
  17238. // (binop undef, undef) may not return undef, so compute that result.
  17239. SDLoc DL(N);
  17240. SDValue VecC =
  17241. DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
  17242. SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
  17243. return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
  17244. }
  17245. }
  17246. // Make sure all but the first op are undef or constant.
  17247. auto ConcatWithConstantOrUndef = [](SDValue Concat) {
  17248. return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
  17249. std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
  17250. [](const SDValue &Op) {
  17251. return Op.isUndef() ||
  17252. ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
  17253. });
  17254. };
  17255. // The following pattern is likely to emerge with vector reduction ops. Moving
  17256. // the binary operation ahead of the concat may allow using a narrower vector
  17257. // instruction that has better performance than the wide version of the op:
  17258. // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
  17259. // concat (VBinOp X, Y), VecC
  17260. if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
  17261. (LHS.hasOneUse() || RHS.hasOneUse())) {
  17262. EVT NarrowVT = LHS.getOperand(0).getValueType();
  17263. if (NarrowVT == RHS.getOperand(0).getValueType() &&
  17264. TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
  17265. SDLoc DL(N);
  17266. unsigned NumOperands = LHS.getNumOperands();
  17267. SmallVector<SDValue, 4> ConcatOps;
  17268. for (unsigned i = 0; i != NumOperands; ++i) {
  17269. // This constant fold for operands 1 and up.
  17270. ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
  17271. RHS.getOperand(i)));
  17272. }
  17273. return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
  17274. }
  17275. }
  17276. if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
  17277. return V;
  17278. return SDValue();
  17279. }
  17280. SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
  17281. SDValue N2) {
  17282. assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
  17283. SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
  17284. cast<CondCodeSDNode>(N0.getOperand(2))->get());
  17285. // If we got a simplified select_cc node back from SimplifySelectCC, then
  17286. // break it down into a new SETCC node, and a new SELECT node, and then return
  17287. // the SELECT node, since we were called with a SELECT node.
  17288. if (SCC.getNode()) {
  17289. // Check to see if we got a select_cc back (to turn into setcc/select).
  17290. // Otherwise, just return whatever node we got back, like fabs.
  17291. if (SCC.getOpcode() == ISD::SELECT_CC) {
  17292. const SDNodeFlags Flags = N0.getNode()->getFlags();
  17293. SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
  17294. N0.getValueType(),
  17295. SCC.getOperand(0), SCC.getOperand(1),
  17296. SCC.getOperand(4), Flags);
  17297. AddToWorklist(SETCC.getNode());
  17298. SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
  17299. SCC.getOperand(2), SCC.getOperand(3));
  17300. SelectNode->setFlags(Flags);
  17301. return SelectNode;
  17302. }
  17303. return SCC;
  17304. }
  17305. return SDValue();
  17306. }
  17307. /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
  17308. /// being selected between, see if we can simplify the select. Callers of this
  17309. /// should assume that TheSelect is deleted if this returns true. As such, they
  17310. /// should return the appropriate thing (e.g. the node) back to the top-level of
  17311. /// the DAG combiner loop to avoid it being looked at.
  17312. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
  17313. SDValue RHS) {
  17314. // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
  17315. // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
  17316. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
  17317. if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
  17318. // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
  17319. SDValue Sqrt = RHS;
  17320. ISD::CondCode CC;
  17321. SDValue CmpLHS;
  17322. const ConstantFPSDNode *Zero = nullptr;
  17323. if (TheSelect->getOpcode() == ISD::SELECT_CC) {
  17324. CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
  17325. CmpLHS = TheSelect->getOperand(0);
  17326. Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
  17327. } else {
  17328. // SELECT or VSELECT
  17329. SDValue Cmp = TheSelect->getOperand(0);
  17330. if (Cmp.getOpcode() == ISD::SETCC) {
  17331. CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
  17332. CmpLHS = Cmp.getOperand(0);
  17333. Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
  17334. }
  17335. }
  17336. if (Zero && Zero->isZero() &&
  17337. Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
  17338. CC == ISD::SETULT || CC == ISD::SETLT)) {
  17339. // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
  17340. CombineTo(TheSelect, Sqrt);
  17341. return true;
  17342. }
  17343. }
  17344. }
  17345. // Cannot simplify select with vector condition
  17346. if (TheSelect->getOperand(0).getValueType().isVector()) return false;
  17347. // If this is a select from two identical things, try to pull the operation
  17348. // through the select.
  17349. if (LHS.getOpcode() != RHS.getOpcode() ||
  17350. !LHS.hasOneUse() || !RHS.hasOneUse())
  17351. return false;
  17352. // If this is a load and the token chain is identical, replace the select
  17353. // of two loads with a load through a select of the address to load from.
  17354. // This triggers in things like "select bool X, 10.0, 123.0" after the FP
  17355. // constants have been dropped into the constant pool.
  17356. if (LHS.getOpcode() == ISD::LOAD) {
  17357. LoadSDNode *LLD = cast<LoadSDNode>(LHS);
  17358. LoadSDNode *RLD = cast<LoadSDNode>(RHS);
  17359. // Token chains must be identical.
  17360. if (LHS.getOperand(0) != RHS.getOperand(0) ||
  17361. // Do not let this transformation reduce the number of volatile loads.
  17362. // Be conservative for atomics for the moment
  17363. // TODO: This does appear to be legal for unordered atomics (see D66309)
  17364. !LLD->isSimple() || !RLD->isSimple() ||
  17365. // FIXME: If either is a pre/post inc/dec load,
  17366. // we'd need to split out the address adjustment.
  17367. LLD->isIndexed() || RLD->isIndexed() ||
  17368. // If this is an EXTLOAD, the VT's must match.
  17369. LLD->getMemoryVT() != RLD->getMemoryVT() ||
  17370. // If this is an EXTLOAD, the kind of extension must match.
  17371. (LLD->getExtensionType() != RLD->getExtensionType() &&
  17372. // The only exception is if one of the extensions is anyext.
  17373. LLD->getExtensionType() != ISD::EXTLOAD &&
  17374. RLD->getExtensionType() != ISD::EXTLOAD) ||
  17375. // FIXME: this discards src value information. This is
  17376. // over-conservative. It would be beneficial to be able to remember
  17377. // both potential memory locations. Since we are discarding
  17378. // src value info, don't do the transformation if the memory
  17379. // locations are not in the default address space.
  17380. LLD->getPointerInfo().getAddrSpace() != 0 ||
  17381. RLD->getPointerInfo().getAddrSpace() != 0 ||
  17382. // We can't produce a CMOV of a TargetFrameIndex since we won't
  17383. // generate the address generation required.
  17384. LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
  17385. RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
  17386. !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
  17387. LLD->getBasePtr().getValueType()))
  17388. return false;
  17389. // The loads must not depend on one another.
  17390. if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
  17391. return false;
  17392. // Check that the select condition doesn't reach either load. If so,
  17393. // folding this will induce a cycle into the DAG. If not, this is safe to
  17394. // xform, so create a select of the addresses.
  17395. SmallPtrSet<const SDNode *, 32> Visited;
  17396. SmallVector<const SDNode *, 16> Worklist;
  17397. // Always fail if LLD and RLD are not independent. TheSelect is a
  17398. // predecessor to all Nodes in question so we need not search past it.
  17399. Visited.insert(TheSelect);
  17400. Worklist.push_back(LLD);
  17401. Worklist.push_back(RLD);
  17402. if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
  17403. SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
  17404. return false;
  17405. SDValue Addr;
  17406. if (TheSelect->getOpcode() == ISD::SELECT) {
  17407. // We cannot do this optimization if any pair of {RLD, LLD} is a
  17408. // predecessor to {RLD, LLD, CondNode}. As we've already compared the
  17409. // Loads, we only need to check if CondNode is a successor to one of the
  17410. // loads. We can further avoid this if there's no use of their chain
  17411. // value.
  17412. SDNode *CondNode = TheSelect->getOperand(0).getNode();
  17413. Worklist.push_back(CondNode);
  17414. if ((LLD->hasAnyUseOfValue(1) &&
  17415. SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
  17416. (RLD->hasAnyUseOfValue(1) &&
  17417. SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
  17418. return false;
  17419. Addr = DAG.getSelect(SDLoc(TheSelect),
  17420. LLD->getBasePtr().getValueType(),
  17421. TheSelect->getOperand(0), LLD->getBasePtr(),
  17422. RLD->getBasePtr());
  17423. } else { // Otherwise SELECT_CC
  17424. // We cannot do this optimization if any pair of {RLD, LLD} is a
  17425. // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
  17426. // the Loads, we only need to check if CondLHS/CondRHS is a successor to
  17427. // one of the loads. We can further avoid this if there's no use of their
  17428. // chain value.
  17429. SDNode *CondLHS = TheSelect->getOperand(0).getNode();
  17430. SDNode *CondRHS = TheSelect->getOperand(1).getNode();
  17431. Worklist.push_back(CondLHS);
  17432. Worklist.push_back(CondRHS);
  17433. if ((LLD->hasAnyUseOfValue(1) &&
  17434. SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
  17435. (RLD->hasAnyUseOfValue(1) &&
  17436. SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
  17437. return false;
  17438. Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
  17439. LLD->getBasePtr().getValueType(),
  17440. TheSelect->getOperand(0),
  17441. TheSelect->getOperand(1),
  17442. LLD->getBasePtr(), RLD->getBasePtr(),
  17443. TheSelect->getOperand(4));
  17444. }
  17445. SDValue Load;
  17446. // It is safe to replace the two loads if they have different alignments,
  17447. // but the new load must be the minimum (most restrictive) alignment of the
  17448. // inputs.
  17449. unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
  17450. MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
  17451. if (!RLD->isInvariant())
  17452. MMOFlags &= ~MachineMemOperand::MOInvariant;
  17453. if (!RLD->isDereferenceable())
  17454. MMOFlags &= ~MachineMemOperand::MODereferenceable;
  17455. if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
  17456. // FIXME: Discards pointer and AA info.
  17457. Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
  17458. LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
  17459. MMOFlags);
  17460. } else {
  17461. // FIXME: Discards pointer and AA info.
  17462. Load = DAG.getExtLoad(
  17463. LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
  17464. : LLD->getExtensionType(),
  17465. SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
  17466. MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
  17467. }
  17468. // Users of the select now use the result of the load.
  17469. CombineTo(TheSelect, Load);
  17470. // Users of the old loads now use the new load's chain. We know the
  17471. // old-load value is dead now.
  17472. CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
  17473. CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
  17474. return true;
  17475. }
  17476. return false;
  17477. }
  17478. /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
  17479. /// bitwise 'and'.
  17480. SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
  17481. SDValue N1, SDValue N2, SDValue N3,
  17482. ISD::CondCode CC) {
  17483. // If this is a select where the false operand is zero and the compare is a
  17484. // check of the sign bit, see if we can perform the "gzip trick":
  17485. // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
  17486. // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
  17487. EVT XType = N0.getValueType();
  17488. EVT AType = N2.getValueType();
  17489. if (!isNullConstant(N3) || !XType.bitsGE(AType))
  17490. return SDValue();
  17491. // If the comparison is testing for a positive value, we have to invert
  17492. // the sign bit mask, so only do that transform if the target has a bitwise
  17493. // 'and not' instruction (the invert is free).
  17494. if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
  17495. // (X > -1) ? A : 0
  17496. // (X > 0) ? X : 0 <-- This is canonical signed max.
  17497. if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
  17498. return SDValue();
  17499. } else if (CC == ISD::SETLT) {
  17500. // (X < 0) ? A : 0
  17501. // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
  17502. if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
  17503. return SDValue();
  17504. } else {
  17505. return SDValue();
  17506. }
  17507. // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
  17508. // constant.
  17509. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
  17510. auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
  17511. if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
  17512. unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
  17513. SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
  17514. SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
  17515. AddToWorklist(Shift.getNode());
  17516. if (XType.bitsGT(AType)) {
  17517. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  17518. AddToWorklist(Shift.getNode());
  17519. }
  17520. if (CC == ISD::SETGT)
  17521. Shift = DAG.getNOT(DL, Shift, AType);
  17522. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  17523. }
  17524. SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
  17525. SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
  17526. AddToWorklist(Shift.getNode());
  17527. if (XType.bitsGT(AType)) {
  17528. Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
  17529. AddToWorklist(Shift.getNode());
  17530. }
  17531. if (CC == ISD::SETGT)
  17532. Shift = DAG.getNOT(DL, Shift, AType);
  17533. return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
  17534. }
  17535. /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
  17536. /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
  17537. /// in it. This may be a win when the constant is not otherwise available
  17538. /// because it replaces two constant pool loads with one.
  17539. SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
  17540. const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
  17541. ISD::CondCode CC) {
  17542. if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
  17543. return SDValue();
  17544. // If we are before legalize types, we want the other legalization to happen
  17545. // first (for example, to avoid messing with soft float).
  17546. auto *TV = dyn_cast<ConstantFPSDNode>(N2);
  17547. auto *FV = dyn_cast<ConstantFPSDNode>(N3);
  17548. EVT VT = N2.getValueType();
  17549. if (!TV || !FV || !TLI.isTypeLegal(VT))
  17550. return SDValue();
  17551. // If a constant can be materialized without loads, this does not make sense.
  17552. if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
  17553. TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
  17554. TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
  17555. return SDValue();
  17556. // If both constants have multiple uses, then we won't need to do an extra
  17557. // load. The values are likely around in registers for other users.
  17558. if (!TV->hasOneUse() && !FV->hasOneUse())
  17559. return SDValue();
  17560. Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
  17561. const_cast<ConstantFP*>(TV->getConstantFPValue()) };
  17562. Type *FPTy = Elts[0]->getType();
  17563. const DataLayout &TD = DAG.getDataLayout();
  17564. // Create a ConstantArray of the two constants.
  17565. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
  17566. SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
  17567. TD.getPrefTypeAlignment(FPTy));
  17568. unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  17569. // Get offsets to the 0 and 1 elements of the array, so we can select between
  17570. // them.
  17571. SDValue Zero = DAG.getIntPtrConstant(0, DL);
  17572. unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
  17573. SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
  17574. SDValue Cond =
  17575. DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
  17576. AddToWorklist(Cond.getNode());
  17577. SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
  17578. AddToWorklist(CstOffset.getNode());
  17579. CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
  17580. AddToWorklist(CPIdx.getNode());
  17581. return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
  17582. MachinePointerInfo::getConstantPool(
  17583. DAG.getMachineFunction()), Alignment);
  17584. }
  17585. /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
  17586. /// where 'cond' is the comparison specified by CC.
  17587. SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
  17588. SDValue N2, SDValue N3, ISD::CondCode CC,
  17589. bool NotExtCompare) {
  17590. // (x ? y : y) -> y.
  17591. if (N2 == N3) return N2;
  17592. EVT CmpOpVT = N0.getValueType();
  17593. EVT CmpResVT = getSetCCResultType(CmpOpVT);
  17594. EVT VT = N2.getValueType();
  17595. auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
  17596. auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
  17597. auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
  17598. // Determine if the condition we're dealing with is constant.
  17599. if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
  17600. AddToWorklist(SCC.getNode());
  17601. if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
  17602. // fold select_cc true, x, y -> x
  17603. // fold select_cc false, x, y -> y
  17604. return !(SCCC->isNullValue()) ? N2 : N3;
  17605. }
  17606. }
  17607. if (SDValue V =
  17608. convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
  17609. return V;
  17610. if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
  17611. return V;
  17612. // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
  17613. // where y is has a single bit set.
  17614. // A plaintext description would be, we can turn the SELECT_CC into an AND
  17615. // when the condition can be materialized as an all-ones register. Any
  17616. // single bit-test can be materialized as an all-ones register with
  17617. // shift-left and shift-right-arith.
  17618. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
  17619. N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
  17620. SDValue AndLHS = N0->getOperand(0);
  17621. auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
  17622. if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
  17623. // Shift the tested bit over the sign bit.
  17624. const APInt &AndMask = ConstAndRHS->getAPIntValue();
  17625. SDValue ShlAmt =
  17626. DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
  17627. getShiftAmountTy(AndLHS.getValueType()));
  17628. SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
  17629. // Now arithmetic right shift it all the way over, so the result is either
  17630. // all-ones, or zero.
  17631. SDValue ShrAmt =
  17632. DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
  17633. getShiftAmountTy(Shl.getValueType()));
  17634. SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
  17635. return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
  17636. }
  17637. }
  17638. // fold select C, 16, 0 -> shl C, 4
  17639. bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
  17640. bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
  17641. if ((Fold || Swap) &&
  17642. TLI.getBooleanContents(CmpOpVT) ==
  17643. TargetLowering::ZeroOrOneBooleanContent &&
  17644. (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
  17645. if (Swap) {
  17646. CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
  17647. std::swap(N2C, N3C);
  17648. }
  17649. // If the caller doesn't want us to simplify this into a zext of a compare,
  17650. // don't do it.
  17651. if (NotExtCompare && N2C->isOne())
  17652. return SDValue();
  17653. SDValue Temp, SCC;
  17654. // zext (setcc n0, n1)
  17655. if (LegalTypes) {
  17656. SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
  17657. if (VT.bitsLT(SCC.getValueType()))
  17658. Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
  17659. else
  17660. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
  17661. } else {
  17662. SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
  17663. Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
  17664. }
  17665. AddToWorklist(SCC.getNode());
  17666. AddToWorklist(Temp.getNode());
  17667. if (N2C->isOne())
  17668. return Temp;
  17669. // shl setcc result by log2 n2c
  17670. return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
  17671. DAG.getConstant(N2C->getAPIntValue().logBase2(),
  17672. SDLoc(Temp),
  17673. getShiftAmountTy(Temp.getValueType())));
  17674. }
  17675. // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
  17676. // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
  17677. // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
  17678. // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
  17679. // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
  17680. // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
  17681. // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
  17682. // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
  17683. if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
  17684. SDValue ValueOnZero = N2;
  17685. SDValue Count = N3;
  17686. // If the condition is NE instead of E, swap the operands.
  17687. if (CC == ISD::SETNE)
  17688. std::swap(ValueOnZero, Count);
  17689. // Check if the value on zero is a constant equal to the bits in the type.
  17690. if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
  17691. if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
  17692. // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
  17693. // legal, combine to just cttz.
  17694. if ((Count.getOpcode() == ISD::CTTZ ||
  17695. Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
  17696. N0 == Count.getOperand(0) &&
  17697. (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
  17698. return DAG.getNode(ISD::CTTZ, DL, VT, N0);
  17699. // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
  17700. // legal, combine to just ctlz.
  17701. if ((Count.getOpcode() == ISD::CTLZ ||
  17702. Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
  17703. N0 == Count.getOperand(0) &&
  17704. (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
  17705. return DAG.getNode(ISD::CTLZ, DL, VT, N0);
  17706. }
  17707. }
  17708. }
  17709. return SDValue();
  17710. }
  17711. /// This is a stub for TargetLowering::SimplifySetCC.
  17712. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  17713. ISD::CondCode Cond, const SDLoc &DL,
  17714. bool foldBooleans) {
  17715. TargetLowering::DAGCombinerInfo
  17716. DagCombineInfo(DAG, Level, false, this);
  17717. return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
  17718. }
  17719. /// Given an ISD::SDIV node expressing a divide by constant, return
  17720. /// a DAG expression to select that will generate the same value by multiplying
  17721. /// by a magic number.
  17722. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  17723. SDValue DAGCombiner::BuildSDIV(SDNode *N) {
  17724. // when optimising for minimum size, we don't want to expand a div to a mul
  17725. // and a shift.
  17726. if (DAG.getMachineFunction().getFunction().hasMinSize())
  17727. return SDValue();
  17728. SmallVector<SDNode *, 8> Built;
  17729. if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
  17730. for (SDNode *N : Built)
  17731. AddToWorklist(N);
  17732. return S;
  17733. }
  17734. return SDValue();
  17735. }
  17736. /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
  17737. /// DAG expression that will generate the same value by right shifting.
  17738. SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
  17739. ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
  17740. if (!C)
  17741. return SDValue();
  17742. // Avoid division by zero.
  17743. if (C->isNullValue())
  17744. return SDValue();
  17745. SmallVector<SDNode *, 8> Built;
  17746. if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
  17747. for (SDNode *N : Built)
  17748. AddToWorklist(N);
  17749. return S;
  17750. }
  17751. return SDValue();
  17752. }
  17753. /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
  17754. /// expression that will generate the same value by multiplying by a magic
  17755. /// number.
  17756. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  17757. SDValue DAGCombiner::BuildUDIV(SDNode *N) {
  17758. // when optimising for minimum size, we don't want to expand a div to a mul
  17759. // and a shift.
  17760. if (DAG.getMachineFunction().getFunction().hasMinSize())
  17761. return SDValue();
  17762. SmallVector<SDNode *, 8> Built;
  17763. if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
  17764. for (SDNode *N : Built)
  17765. AddToWorklist(N);
  17766. return S;
  17767. }
  17768. return SDValue();
  17769. }
  17770. /// Determines the LogBase2 value for a non-null input value using the
  17771. /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
  17772. SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
  17773. EVT VT = V.getValueType();
  17774. unsigned EltBits = VT.getScalarSizeInBits();
  17775. SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
  17776. SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
  17777. SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
  17778. return LogBase2;
  17779. }
  17780. /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  17781. /// For the reciprocal, we need to find the zero of the function:
  17782. /// F(X) = A X - 1 [which has a zero at X = 1/A]
  17783. /// =>
  17784. /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
  17785. /// does not require additional intermediate precision]
  17786. /// For the last iteration, put numerator N into it to gain more precision:
  17787. /// Result = N X_i + X_i (N - N A X_i)
  17788. SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
  17789. SDNodeFlags Flags) {
  17790. if (Level >= AfterLegalizeDAG)
  17791. return SDValue();
  17792. // TODO: Handle half and/or extended types?
  17793. EVT VT = Op.getValueType();
  17794. if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
  17795. return SDValue();
  17796. // If estimates are explicitly disabled for this function, we're done.
  17797. MachineFunction &MF = DAG.getMachineFunction();
  17798. int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
  17799. if (Enabled == TLI.ReciprocalEstimate::Disabled)
  17800. return SDValue();
  17801. // Estimates may be explicitly enabled for this type with a custom number of
  17802. // refinement steps.
  17803. int Iterations = TLI.getDivRefinementSteps(VT, MF);
  17804. if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
  17805. AddToWorklist(Est.getNode());
  17806. SDLoc DL(Op);
  17807. if (Iterations) {
  17808. SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
  17809. // Newton iterations: Est = Est + Est (N - Arg * Est)
  17810. // If this is the last iteration, also multiply by the numerator.
  17811. for (int i = 0; i < Iterations; ++i) {
  17812. SDValue MulEst = Est;
  17813. if (i == Iterations - 1) {
  17814. MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
  17815. AddToWorklist(MulEst.getNode());
  17816. }
  17817. SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
  17818. AddToWorklist(NewEst.getNode());
  17819. NewEst = DAG.getNode(ISD::FSUB, DL, VT,
  17820. (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
  17821. AddToWorklist(NewEst.getNode());
  17822. NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
  17823. AddToWorklist(NewEst.getNode());
  17824. Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
  17825. AddToWorklist(Est.getNode());
  17826. }
  17827. } else {
  17828. // If no iterations are available, multiply with N.
  17829. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
  17830. AddToWorklist(Est.getNode());
  17831. }
  17832. return Est;
  17833. }
  17834. return SDValue();
  17835. }
  17836. /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  17837. /// For the reciprocal sqrt, we need to find the zero of the function:
  17838. /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
  17839. /// =>
  17840. /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
  17841. /// As a result, we precompute A/2 prior to the iteration loop.
  17842. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
  17843. unsigned Iterations,
  17844. SDNodeFlags Flags, bool Reciprocal) {
  17845. EVT VT = Arg.getValueType();
  17846. SDLoc DL(Arg);
  17847. SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
  17848. // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
  17849. // this entire sequence requires only one FP constant.
  17850. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
  17851. HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
  17852. // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
  17853. for (unsigned i = 0; i < Iterations; ++i) {
  17854. SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
  17855. NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
  17856. NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
  17857. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
  17858. }
  17859. // If non-reciprocal square root is requested, multiply the result by Arg.
  17860. if (!Reciprocal)
  17861. Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
  17862. return Est;
  17863. }
  17864. /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
  17865. /// For the reciprocal sqrt, we need to find the zero of the function:
  17866. /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
  17867. /// =>
  17868. /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
  17869. SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
  17870. unsigned Iterations,
  17871. SDNodeFlags Flags, bool Reciprocal) {
  17872. EVT VT = Arg.getValueType();
  17873. SDLoc DL(Arg);
  17874. SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
  17875. SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
  17876. // This routine must enter the loop below to work correctly
  17877. // when (Reciprocal == false).
  17878. assert(Iterations > 0);
  17879. // Newton iterations for reciprocal square root:
  17880. // E = (E * -0.5) * ((A * E) * E + -3.0)
  17881. for (unsigned i = 0; i < Iterations; ++i) {
  17882. SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
  17883. SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
  17884. SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
  17885. // When calculating a square root at the last iteration build:
  17886. // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
  17887. // (notice a common subexpression)
  17888. SDValue LHS;
  17889. if (Reciprocal || (i + 1) < Iterations) {
  17890. // RSQRT: LHS = (E * -0.5)
  17891. LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
  17892. } else {
  17893. // SQRT: LHS = (A * E) * -0.5
  17894. LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
  17895. }
  17896. Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
  17897. }
  17898. return Est;
  17899. }
  17900. /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
  17901. /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
  17902. /// Op can be zero.
  17903. SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
  17904. bool Reciprocal) {
  17905. if (Level >= AfterLegalizeDAG)
  17906. return SDValue();
  17907. // TODO: Handle half and/or extended types?
  17908. EVT VT = Op.getValueType();
  17909. if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
  17910. return SDValue();
  17911. // If estimates are explicitly disabled for this function, we're done.
  17912. MachineFunction &MF = DAG.getMachineFunction();
  17913. int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
  17914. if (Enabled == TLI.ReciprocalEstimate::Disabled)
  17915. return SDValue();
  17916. // Estimates may be explicitly enabled for this type with a custom number of
  17917. // refinement steps.
  17918. int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
  17919. bool UseOneConstNR = false;
  17920. if (SDValue Est =
  17921. TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
  17922. Reciprocal)) {
  17923. AddToWorklist(Est.getNode());
  17924. if (Iterations) {
  17925. Est = UseOneConstNR
  17926. ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
  17927. : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
  17928. if (!Reciprocal) {
  17929. // The estimate is now completely wrong if the input was exactly 0.0 or
  17930. // possibly a denormal. Force the answer to 0.0 for those cases.
  17931. SDLoc DL(Op);
  17932. EVT CCVT = getSetCCResultType(VT);
  17933. ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
  17934. const Function &F = DAG.getMachineFunction().getFunction();
  17935. Attribute Denorms = F.getFnAttribute("denormal-fp-math");
  17936. if (Denorms.getValueAsString().equals("ieee")) {
  17937. // fabs(X) < SmallestNormal ? 0.0 : Est
  17938. const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
  17939. APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
  17940. SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
  17941. SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
  17942. SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
  17943. SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
  17944. Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
  17945. } else {
  17946. // X == 0.0 ? 0.0 : Est
  17947. SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
  17948. SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
  17949. Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
  17950. }
  17951. }
  17952. }
  17953. return Est;
  17954. }
  17955. return SDValue();
  17956. }
  17957. SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
  17958. return buildSqrtEstimateImpl(Op, Flags, true);
  17959. }
  17960. SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
  17961. return buildSqrtEstimateImpl(Op, Flags, false);
  17962. }
  17963. /// Return true if there is any possibility that the two addresses overlap.
  17964. bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
  17965. struct MemUseCharacteristics {
  17966. bool IsVolatile;
  17967. bool IsAtomic;
  17968. SDValue BasePtr;
  17969. int64_t Offset;
  17970. Optional<int64_t> NumBytes;
  17971. MachineMemOperand *MMO;
  17972. };
  17973. auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
  17974. if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
  17975. int64_t Offset = 0;
  17976. if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
  17977. Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
  17978. ? C->getSExtValue()
  17979. : (LSN->getAddressingMode() == ISD::PRE_DEC)
  17980. ? -1 * C->getSExtValue()
  17981. : 0;
  17982. return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
  17983. Offset /*base offset*/,
  17984. Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
  17985. LSN->getMemOperand()};
  17986. }
  17987. if (const auto *LN = cast<LifetimeSDNode>(N))
  17988. return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
  17989. (LN->hasOffset()) ? LN->getOffset() : 0,
  17990. (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
  17991. : Optional<int64_t>(),
  17992. (MachineMemOperand *)nullptr};
  17993. // Default.
  17994. return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
  17995. (int64_t)0 /*offset*/,
  17996. Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
  17997. };
  17998. MemUseCharacteristics MUC0 = getCharacteristics(Op0),
  17999. MUC1 = getCharacteristics(Op1);
  18000. // If they are to the same address, then they must be aliases.
  18001. if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
  18002. MUC0.Offset == MUC1.Offset)
  18003. return true;
  18004. // If they are both volatile then they cannot be reordered.
  18005. if (MUC0.IsVolatile && MUC1.IsVolatile)
  18006. return true;
  18007. // Be conservative about atomics for the moment
  18008. // TODO: This is way overconservative for unordered atomics (see D66309)
  18009. if (MUC0.IsAtomic && MUC1.IsAtomic)
  18010. return true;
  18011. if (MUC0.MMO && MUC1.MMO) {
  18012. if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
  18013. (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
  18014. return false;
  18015. }
  18016. // Try to prove that there is aliasing, or that there is no aliasing. Either
  18017. // way, we can return now. If nothing can be proved, proceed with more tests.
  18018. bool IsAlias;
  18019. if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
  18020. DAG, IsAlias))
  18021. return IsAlias;
  18022. // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
  18023. // either are not known.
  18024. if (!MUC0.MMO || !MUC1.MMO)
  18025. return true;
  18026. // If one operation reads from invariant memory, and the other may store, they
  18027. // cannot alias. These should really be checking the equivalent of mayWrite,
  18028. // but it only matters for memory nodes other than load /store.
  18029. if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
  18030. (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
  18031. return false;
  18032. // If we know required SrcValue1 and SrcValue2 have relatively large
  18033. // alignment compared to the size and offset of the access, we may be able
  18034. // to prove they do not alias. This check is conservative for now to catch
  18035. // cases created by splitting vector types.
  18036. int64_t SrcValOffset0 = MUC0.MMO->getOffset();
  18037. int64_t SrcValOffset1 = MUC1.MMO->getOffset();
  18038. unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
  18039. unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
  18040. if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
  18041. MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
  18042. *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
  18043. int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
  18044. int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
  18045. // There is no overlap between these relatively aligned accesses of
  18046. // similar size. Return no alias.
  18047. if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
  18048. (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
  18049. return false;
  18050. }
  18051. bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
  18052. ? CombinerGlobalAA
  18053. : DAG.getSubtarget().useAA();
  18054. #ifndef NDEBUG
  18055. if (CombinerAAOnlyFunc.getNumOccurrences() &&
  18056. CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
  18057. UseAA = false;
  18058. #endif
  18059. if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
  18060. // Use alias analysis information.
  18061. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
  18062. int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
  18063. int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
  18064. AliasResult AAResult = AA->alias(
  18065. MemoryLocation(MUC0.MMO->getValue(), Overlap0,
  18066. UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
  18067. MemoryLocation(MUC1.MMO->getValue(), Overlap1,
  18068. UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
  18069. if (AAResult == NoAlias)
  18070. return false;
  18071. }
  18072. // Otherwise we have to assume they alias.
  18073. return true;
  18074. }
  18075. /// Walk up chain skipping non-aliasing memory nodes,
  18076. /// looking for aliasing nodes and adding them to the Aliases vector.
  18077. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
  18078. SmallVectorImpl<SDValue> &Aliases) {
  18079. SmallVector<SDValue, 8> Chains; // List of chains to visit.
  18080. SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
  18081. // Get alias information for node.
  18082. // TODO: relax aliasing for unordered atomics (see D66309)
  18083. const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
  18084. // Starting off.
  18085. Chains.push_back(OriginalChain);
  18086. unsigned Depth = 0;
  18087. // Attempt to improve chain by a single step
  18088. std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
  18089. switch (C.getOpcode()) {
  18090. case ISD::EntryToken:
  18091. // No need to mark EntryToken.
  18092. C = SDValue();
  18093. return true;
  18094. case ISD::LOAD:
  18095. case ISD::STORE: {
  18096. // Get alias information for C.
  18097. // TODO: Relax aliasing for unordered atomics (see D66309)
  18098. bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
  18099. cast<LSBaseSDNode>(C.getNode())->isSimple();
  18100. if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
  18101. // Look further up the chain.
  18102. C = C.getOperand(0);
  18103. return true;
  18104. }
  18105. // Alias, so stop here.
  18106. return false;
  18107. }
  18108. case ISD::CopyFromReg:
  18109. // Always forward past past CopyFromReg.
  18110. C = C.getOperand(0);
  18111. return true;
  18112. case ISD::LIFETIME_START:
  18113. case ISD::LIFETIME_END: {
  18114. // We can forward past any lifetime start/end that can be proven not to
  18115. // alias the memory access.
  18116. if (!isAlias(N, C.getNode())) {
  18117. // Look further up the chain.
  18118. C = C.getOperand(0);
  18119. return true;
  18120. }
  18121. return false;
  18122. }
  18123. default:
  18124. return false;
  18125. }
  18126. };
  18127. // Look at each chain and determine if it is an alias. If so, add it to the
  18128. // aliases list. If not, then continue up the chain looking for the next
  18129. // candidate.
  18130. while (!Chains.empty()) {
  18131. SDValue Chain = Chains.pop_back_val();
  18132. // Don't bother if we've seen Chain before.
  18133. if (!Visited.insert(Chain.getNode()).second)
  18134. continue;
  18135. // For TokenFactor nodes, look at each operand and only continue up the
  18136. // chain until we reach the depth limit.
  18137. //
  18138. // FIXME: The depth check could be made to return the last non-aliasing
  18139. // chain we found before we hit a tokenfactor rather than the original
  18140. // chain.
  18141. if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
  18142. Aliases.clear();
  18143. Aliases.push_back(OriginalChain);
  18144. return;
  18145. }
  18146. if (Chain.getOpcode() == ISD::TokenFactor) {
  18147. // We have to check each of the operands of the token factor for "small"
  18148. // token factors, so we queue them up. Adding the operands to the queue
  18149. // (stack) in reverse order maintains the original order and increases the
  18150. // likelihood that getNode will find a matching token factor (CSE.)
  18151. if (Chain.getNumOperands() > 16) {
  18152. Aliases.push_back(Chain);
  18153. continue;
  18154. }
  18155. for (unsigned n = Chain.getNumOperands(); n;)
  18156. Chains.push_back(Chain.getOperand(--n));
  18157. ++Depth;
  18158. continue;
  18159. }
  18160. // Everything else
  18161. if (ImproveChain(Chain)) {
  18162. // Updated Chain Found, Consider new chain if one exists.
  18163. if (Chain.getNode())
  18164. Chains.push_back(Chain);
  18165. ++Depth;
  18166. continue;
  18167. }
  18168. // No Improved Chain Possible, treat as Alias.
  18169. Aliases.push_back(Chain);
  18170. }
  18171. }
  18172. /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
  18173. /// (aliasing node.)
  18174. SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
  18175. if (OptLevel == CodeGenOpt::None)
  18176. return OldChain;
  18177. // Ops for replacing token factor.
  18178. SmallVector<SDValue, 8> Aliases;
  18179. // Accumulate all the aliases to this node.
  18180. GatherAllAliases(N, OldChain, Aliases);
  18181. // If no operands then chain to entry token.
  18182. if (Aliases.size() == 0)
  18183. return DAG.getEntryNode();
  18184. // If a single operand then chain to it. We don't need to revisit it.
  18185. if (Aliases.size() == 1)
  18186. return Aliases[0];
  18187. // Construct a custom tailored token factor.
  18188. return DAG.getTokenFactor(SDLoc(N), Aliases);
  18189. }
  18190. namespace {
  18191. // TODO: Replace with with std::monostate when we move to C++17.
  18192. struct UnitT { } Unit;
  18193. bool operator==(const UnitT &, const UnitT &) { return true; }
  18194. bool operator!=(const UnitT &, const UnitT &) { return false; }
  18195. } // namespace
  18196. // This function tries to collect a bunch of potentially interesting
  18197. // nodes to improve the chains of, all at once. This might seem
  18198. // redundant, as this function gets called when visiting every store
  18199. // node, so why not let the work be done on each store as it's visited?
  18200. //
  18201. // I believe this is mainly important because MergeConsecutiveStores
  18202. // is unable to deal with merging stores of different sizes, so unless
  18203. // we improve the chains of all the potential candidates up-front
  18204. // before running MergeConsecutiveStores, it might only see some of
  18205. // the nodes that will eventually be candidates, and then not be able
  18206. // to go from a partially-merged state to the desired final
  18207. // fully-merged state.
  18208. bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
  18209. SmallVector<StoreSDNode *, 8> ChainedStores;
  18210. StoreSDNode *STChain = St;
  18211. // Intervals records which offsets from BaseIndex have been covered. In
  18212. // the common case, every store writes to the immediately previous address
  18213. // space and thus merged with the previous interval at insertion time.
  18214. using IMap =
  18215. llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
  18216. IMap::Allocator A;
  18217. IMap Intervals(A);
  18218. // This holds the base pointer, index, and the offset in bytes from the base
  18219. // pointer.
  18220. const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
  18221. // We must have a base and an offset.
  18222. if (!BasePtr.getBase().getNode())
  18223. return false;
  18224. // Do not handle stores to undef base pointers.
  18225. if (BasePtr.getBase().isUndef())
  18226. return false;
  18227. // Add ST's interval.
  18228. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
  18229. while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
  18230. // If the chain has more than one use, then we can't reorder the mem ops.
  18231. if (!SDValue(Chain, 0)->hasOneUse())
  18232. break;
  18233. // TODO: Relax for unordered atomics (see D66309)
  18234. if (!Chain->isSimple() || Chain->isIndexed())
  18235. break;
  18236. // Find the base pointer and offset for this memory node.
  18237. const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
  18238. // Check that the base pointer is the same as the original one.
  18239. int64_t Offset;
  18240. if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
  18241. break;
  18242. int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
  18243. // Make sure we don't overlap with other intervals by checking the ones to
  18244. // the left or right before inserting.
  18245. auto I = Intervals.find(Offset);
  18246. // If there's a next interval, we should end before it.
  18247. if (I != Intervals.end() && I.start() < (Offset + Length))
  18248. break;
  18249. // If there's a previous interval, we should start after it.
  18250. if (I != Intervals.begin() && (--I).stop() <= Offset)
  18251. break;
  18252. Intervals.insert(Offset, Offset + Length, Unit);
  18253. ChainedStores.push_back(Chain);
  18254. STChain = Chain;
  18255. }
  18256. // If we didn't find a chained store, exit.
  18257. if (ChainedStores.size() == 0)
  18258. return false;
  18259. // Improve all chained stores (St and ChainedStores members) starting from
  18260. // where the store chain ended and return single TokenFactor.
  18261. SDValue NewChain = STChain->getChain();
  18262. SmallVector<SDValue, 8> TFOps;
  18263. for (unsigned I = ChainedStores.size(); I;) {
  18264. StoreSDNode *S = ChainedStores[--I];
  18265. SDValue BetterChain = FindBetterChain(S, NewChain);
  18266. S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
  18267. S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
  18268. TFOps.push_back(SDValue(S, 0));
  18269. ChainedStores[I] = S;
  18270. }
  18271. // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
  18272. SDValue BetterChain = FindBetterChain(St, NewChain);
  18273. SDValue NewST;
  18274. if (St->isTruncatingStore())
  18275. NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
  18276. St->getBasePtr(), St->getMemoryVT(),
  18277. St->getMemOperand());
  18278. else
  18279. NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
  18280. St->getBasePtr(), St->getMemOperand());
  18281. TFOps.push_back(NewST);
  18282. // If we improved every element of TFOps, then we've lost the dependence on
  18283. // NewChain to successors of St and we need to add it back to TFOps. Do so at
  18284. // the beginning to keep relative order consistent with FindBetterChains.
  18285. auto hasImprovedChain = [&](SDValue ST) -> bool {
  18286. return ST->getOperand(0) != NewChain;
  18287. };
  18288. bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
  18289. if (AddNewChain)
  18290. TFOps.insert(TFOps.begin(), NewChain);
  18291. SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
  18292. CombineTo(St, TF);
  18293. // Add TF and its operands to the worklist.
  18294. AddToWorklist(TF.getNode());
  18295. for (const SDValue &Op : TF->ops())
  18296. AddToWorklist(Op.getNode());
  18297. AddToWorklist(STChain);
  18298. return true;
  18299. }
  18300. bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
  18301. if (OptLevel == CodeGenOpt::None)
  18302. return false;
  18303. const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
  18304. // We must have a base and an offset.
  18305. if (!BasePtr.getBase().getNode())
  18306. return false;
  18307. // Do not handle stores to undef base pointers.
  18308. if (BasePtr.getBase().isUndef())
  18309. return false;
  18310. // Directly improve a chain of disjoint stores starting at St.
  18311. if (parallelizeChainedStores(St))
  18312. return true;
  18313. // Improve St's Chain..
  18314. SDValue BetterChain = FindBetterChain(St, St->getChain());
  18315. if (St->getChain() != BetterChain) {
  18316. replaceStoreChain(St, BetterChain);
  18317. return true;
  18318. }
  18319. return false;
  18320. }
  18321. /// This is the entry point for the file.
  18322. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
  18323. CodeGenOpt::Level OptLevel) {
  18324. /// This is the main entry point to this class.
  18325. DAGCombiner(*this, AA, OptLevel).Run(Level);
  18326. }