TargetLowering.cpp 278 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127
  1. //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This implements the TargetLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/CodeGen/TargetLowering.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/CodeGen/CallingConvLower.h"
  15. #include "llvm/CodeGen/MachineFrameInfo.h"
  16. #include "llvm/CodeGen/MachineFunction.h"
  17. #include "llvm/CodeGen/MachineJumpTableInfo.h"
  18. #include "llvm/CodeGen/MachineRegisterInfo.h"
  19. #include "llvm/CodeGen/SelectionDAG.h"
  20. #include "llvm/CodeGen/TargetRegisterInfo.h"
  21. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  22. #include "llvm/IR/DataLayout.h"
  23. #include "llvm/IR/DerivedTypes.h"
  24. #include "llvm/IR/GlobalVariable.h"
  25. #include "llvm/IR/LLVMContext.h"
  26. #include "llvm/MC/MCAsmInfo.h"
  27. #include "llvm/MC/MCExpr.h"
  28. #include "llvm/Support/ErrorHandling.h"
  29. #include "llvm/Support/KnownBits.h"
  30. #include "llvm/Support/MathExtras.h"
  31. #include "llvm/Target/TargetLoweringObjectFile.h"
  32. #include "llvm/Target/TargetMachine.h"
  33. #include <cctype>
  34. using namespace llvm;
  35. /// NOTE: The TargetMachine owns TLOF.
  36. TargetLowering::TargetLowering(const TargetMachine &tm)
  37. : TargetLoweringBase(tm) {}
  38. const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  39. return nullptr;
  40. }
  41. bool TargetLowering::isPositionIndependent() const {
  42. return getTargetMachine().isPositionIndependent();
  43. }
  44. /// Check whether a given call node is in tail position within its function. If
  45. /// so, it sets Chain to the input chain of the tail call.
  46. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  47. SDValue &Chain) const {
  48. const Function &F = DAG.getMachineFunction().getFunction();
  49. // Conservatively require the attributes of the call to match those of
  50. // the return. Ignore NoAlias and NonNull because they don't affect the
  51. // call sequence.
  52. AttributeList CallerAttrs = F.getAttributes();
  53. if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
  54. .removeAttribute(Attribute::NoAlias)
  55. .removeAttribute(Attribute::NonNull)
  56. .hasAttributes())
  57. return false;
  58. // It's not safe to eliminate the sign / zero extension of the return value.
  59. if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
  60. CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
  61. return false;
  62. // Check if the only use is a function return node.
  63. return isUsedByReturnOnly(Node, Chain);
  64. }
  65. bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  66. const uint32_t *CallerPreservedMask,
  67. const SmallVectorImpl<CCValAssign> &ArgLocs,
  68. const SmallVectorImpl<SDValue> &OutVals) const {
  69. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  70. const CCValAssign &ArgLoc = ArgLocs[I];
  71. if (!ArgLoc.isRegLoc())
  72. continue;
  73. Register Reg = ArgLoc.getLocReg();
  74. // Only look at callee saved registers.
  75. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  76. continue;
  77. // Check that we pass the value used for the caller.
  78. // (We look for a CopyFromReg reading a virtual register that is used
  79. // for the function live-in value of register Reg)
  80. SDValue Value = OutVals[I];
  81. if (Value->getOpcode() != ISD::CopyFromReg)
  82. return false;
  83. unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  84. if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  85. return false;
  86. }
  87. return true;
  88. }
  89. /// Set CallLoweringInfo attribute flags based on a call instruction
  90. /// and called function attributes.
  91. void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
  92. unsigned ArgIdx) {
  93. IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
  94. IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
  95. IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
  96. IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
  97. IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
  98. IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
  99. IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
  100. IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
  101. IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
  102. IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
  103. Alignment = Call->getParamAlignment(ArgIdx);
  104. ByValType = nullptr;
  105. if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
  106. ByValType = Call->getParamByValType(ArgIdx);
  107. }
  108. /// Generate a libcall taking the given operands as arguments and returning a
  109. /// result of type RetVT.
  110. std::pair<SDValue, SDValue>
  111. TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
  112. ArrayRef<SDValue> Ops,
  113. MakeLibCallOptions CallOptions,
  114. const SDLoc &dl) const {
  115. TargetLowering::ArgListTy Args;
  116. Args.reserve(Ops.size());
  117. TargetLowering::ArgListEntry Entry;
  118. for (unsigned i = 0; i < Ops.size(); ++i) {
  119. SDValue NewOp = Ops[i];
  120. Entry.Node = NewOp;
  121. Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
  122. Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
  123. CallOptions.IsSExt);
  124. Entry.IsZExt = !Entry.IsSExt;
  125. if (CallOptions.IsSoften &&
  126. !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
  127. Entry.IsSExt = Entry.IsZExt = false;
  128. }
  129. Args.push_back(Entry);
  130. }
  131. if (LC == RTLIB::UNKNOWN_LIBCALL)
  132. report_fatal_error("Unsupported library call operation!");
  133. SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
  134. getPointerTy(DAG.getDataLayout()));
  135. Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
  136. TargetLowering::CallLoweringInfo CLI(DAG);
  137. bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
  138. bool zeroExtend = !signExtend;
  139. if (CallOptions.IsSoften &&
  140. !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
  141. signExtend = zeroExtend = false;
  142. }
  143. CLI.setDebugLoc(dl)
  144. .setChain(DAG.getEntryNode())
  145. .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
  146. .setNoReturn(CallOptions.DoesNotReturn)
  147. .setDiscardResult(!CallOptions.IsReturnValueUsed)
  148. .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
  149. .setSExtResult(signExtend)
  150. .setZExtResult(zeroExtend);
  151. return LowerCallTo(CLI);
  152. }
  153. bool
  154. TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
  155. unsigned Limit, uint64_t Size,
  156. unsigned DstAlign, unsigned SrcAlign,
  157. bool IsMemset,
  158. bool ZeroMemset,
  159. bool MemcpyStrSrc,
  160. bool AllowOverlap,
  161. unsigned DstAS, unsigned SrcAS,
  162. const AttributeList &FuncAttributes) const {
  163. // If 'SrcAlign' is zero, that means the memory operation does not need to
  164. // load the value, i.e. memset or memcpy from constant string. Otherwise,
  165. // it's the inferred alignment of the source. 'DstAlign', on the other hand,
  166. // is the specified alignment of the memory operation. If it is zero, that
  167. // means it's possible to change the alignment of the destination.
  168. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
  169. // not need to be loaded.
  170. if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
  171. return false;
  172. EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
  173. IsMemset, ZeroMemset, MemcpyStrSrc,
  174. FuncAttributes);
  175. if (VT == MVT::Other) {
  176. // Use the largest integer type whose alignment constraints are satisfied.
  177. // We only need to check DstAlign here as SrcAlign is always greater or
  178. // equal to DstAlign (or zero).
  179. VT = MVT::i64;
  180. while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
  181. !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
  182. VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
  183. assert(VT.isInteger());
  184. // Find the largest legal integer type.
  185. MVT LVT = MVT::i64;
  186. while (!isTypeLegal(LVT))
  187. LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
  188. assert(LVT.isInteger());
  189. // If the type we've chosen is larger than the largest legal integer type
  190. // then use that instead.
  191. if (VT.bitsGT(LVT))
  192. VT = LVT;
  193. }
  194. unsigned NumMemOps = 0;
  195. while (Size != 0) {
  196. unsigned VTSize = VT.getSizeInBits() / 8;
  197. while (VTSize > Size) {
  198. // For now, only use non-vector load / store's for the left-over pieces.
  199. EVT NewVT = VT;
  200. unsigned NewVTSize;
  201. bool Found = false;
  202. if (VT.isVector() || VT.isFloatingPoint()) {
  203. NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
  204. if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
  205. isSafeMemOpType(NewVT.getSimpleVT()))
  206. Found = true;
  207. else if (NewVT == MVT::i64 &&
  208. isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
  209. isSafeMemOpType(MVT::f64)) {
  210. // i64 is usually not legal on 32-bit targets, but f64 may be.
  211. NewVT = MVT::f64;
  212. Found = true;
  213. }
  214. }
  215. if (!Found) {
  216. do {
  217. NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
  218. if (NewVT == MVT::i8)
  219. break;
  220. } while (!isSafeMemOpType(NewVT.getSimpleVT()));
  221. }
  222. NewVTSize = NewVT.getSizeInBits() / 8;
  223. // If the new VT cannot cover all of the remaining bits, then consider
  224. // issuing a (or a pair of) unaligned and overlapping load / store.
  225. bool Fast;
  226. if (NumMemOps && AllowOverlap && NewVTSize < Size &&
  227. allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
  228. MachineMemOperand::MONone, &Fast) &&
  229. Fast)
  230. VTSize = Size;
  231. else {
  232. VT = NewVT;
  233. VTSize = NewVTSize;
  234. }
  235. }
  236. if (++NumMemOps > Limit)
  237. return false;
  238. MemOps.push_back(VT);
  239. Size -= VTSize;
  240. }
  241. return true;
  242. }
  243. /// Soften the operands of a comparison. This code is shared among BR_CC,
  244. /// SELECT_CC, and SETCC handlers.
  245. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
  246. SDValue &NewLHS, SDValue &NewRHS,
  247. ISD::CondCode &CCCode,
  248. const SDLoc &dl, const SDValue OldLHS,
  249. const SDValue OldRHS) const {
  250. assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
  251. && "Unsupported setcc type!");
  252. // Expand into one or more soft-fp libcall(s).
  253. RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
  254. bool ShouldInvertCC = false;
  255. switch (CCCode) {
  256. case ISD::SETEQ:
  257. case ISD::SETOEQ:
  258. LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  259. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  260. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  261. break;
  262. case ISD::SETNE:
  263. case ISD::SETUNE:
  264. LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
  265. (VT == MVT::f64) ? RTLIB::UNE_F64 :
  266. (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
  267. break;
  268. case ISD::SETGE:
  269. case ISD::SETOGE:
  270. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  271. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  272. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  273. break;
  274. case ISD::SETLT:
  275. case ISD::SETOLT:
  276. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  277. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  278. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  279. break;
  280. case ISD::SETLE:
  281. case ISD::SETOLE:
  282. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  283. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  284. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  285. break;
  286. case ISD::SETGT:
  287. case ISD::SETOGT:
  288. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  289. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  290. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  291. break;
  292. case ISD::SETUO:
  293. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  294. (VT == MVT::f64) ? RTLIB::UO_F64 :
  295. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  296. break;
  297. case ISD::SETO:
  298. LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
  299. (VT == MVT::f64) ? RTLIB::O_F64 :
  300. (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
  301. break;
  302. case ISD::SETONE:
  303. // SETONE = SETOLT | SETOGT
  304. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  305. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  306. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  307. LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  308. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  309. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  310. break;
  311. case ISD::SETUEQ:
  312. LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
  313. (VT == MVT::f64) ? RTLIB::UO_F64 :
  314. (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
  315. LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
  316. (VT == MVT::f64) ? RTLIB::OEQ_F64 :
  317. (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
  318. break;
  319. default:
  320. // Invert CC for unordered comparisons
  321. ShouldInvertCC = true;
  322. switch (CCCode) {
  323. case ISD::SETULT:
  324. LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
  325. (VT == MVT::f64) ? RTLIB::OGE_F64 :
  326. (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
  327. break;
  328. case ISD::SETULE:
  329. LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
  330. (VT == MVT::f64) ? RTLIB::OGT_F64 :
  331. (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
  332. break;
  333. case ISD::SETUGT:
  334. LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
  335. (VT == MVT::f64) ? RTLIB::OLE_F64 :
  336. (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
  337. break;
  338. case ISD::SETUGE:
  339. LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
  340. (VT == MVT::f64) ? RTLIB::OLT_F64 :
  341. (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
  342. break;
  343. default: llvm_unreachable("Do not know how to soften this setcc!");
  344. }
  345. }
  346. // Use the target specific return value for comparions lib calls.
  347. EVT RetVT = getCmpLibcallReturnType();
  348. SDValue Ops[2] = {NewLHS, NewRHS};
  349. TargetLowering::MakeLibCallOptions CallOptions;
  350. EVT OpsVT[2] = { OldLHS.getValueType(),
  351. OldRHS.getValueType() };
  352. CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
  353. NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
  354. NewRHS = DAG.getConstant(0, dl, RetVT);
  355. CCCode = getCmpLibcallCC(LC1);
  356. if (ShouldInvertCC)
  357. CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
  358. if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
  359. SDValue Tmp = DAG.getNode(
  360. ISD::SETCC, dl,
  361. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
  362. NewLHS, NewRHS, DAG.getCondCode(CCCode));
  363. NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
  364. NewLHS = DAG.getNode(
  365. ISD::SETCC, dl,
  366. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
  367. NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
  368. NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
  369. NewRHS = SDValue();
  370. }
  371. }
  372. /// Return the entry encoding for a jump table in the current function. The
  373. /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
  374. unsigned TargetLowering::getJumpTableEncoding() const {
  375. // In non-pic modes, just use the address of a block.
  376. if (!isPositionIndependent())
  377. return MachineJumpTableInfo::EK_BlockAddress;
  378. // In PIC mode, if the target supports a GPRel32 directive, use it.
  379. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
  380. return MachineJumpTableInfo::EK_GPRel32BlockAddress;
  381. // Otherwise, use a label difference.
  382. return MachineJumpTableInfo::EK_LabelDifference32;
  383. }
  384. SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
  385. SelectionDAG &DAG) const {
  386. // If our PIC model is GP relative, use the global offset table as the base.
  387. unsigned JTEncoding = getJumpTableEncoding();
  388. if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
  389. (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
  390. return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
  391. return Table;
  392. }
  393. /// This returns the relocation base for the given PIC jumptable, the same as
  394. /// getPICJumpTableRelocBase, but as an MCExpr.
  395. const MCExpr *
  396. TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
  397. unsigned JTI,MCContext &Ctx) const{
  398. // The normal PIC reloc base is the label at the start of the jump table.
  399. return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
  400. }
  401. bool
  402. TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  403. const TargetMachine &TM = getTargetMachine();
  404. const GlobalValue *GV = GA->getGlobal();
  405. // If the address is not even local to this DSO we will have to load it from
  406. // a got and then add the offset.
  407. if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
  408. return false;
  409. // If the code is position independent we will have to add a base register.
  410. if (isPositionIndependent())
  411. return false;
  412. // Otherwise we can do it.
  413. return true;
  414. }
  415. //===----------------------------------------------------------------------===//
  416. // Optimization Methods
  417. //===----------------------------------------------------------------------===//
  418. /// If the specified instruction has a constant integer operand and there are
  419. /// bits set in that constant that are not demanded, then clear those bits and
  420. /// return true.
  421. bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
  422. TargetLoweringOpt &TLO) const {
  423. SDLoc DL(Op);
  424. unsigned Opcode = Op.getOpcode();
  425. // Do target-specific constant optimization.
  426. if (targetShrinkDemandedConstant(Op, Demanded, TLO))
  427. return TLO.New.getNode();
  428. // FIXME: ISD::SELECT, ISD::SELECT_CC
  429. switch (Opcode) {
  430. default:
  431. break;
  432. case ISD::XOR:
  433. case ISD::AND:
  434. case ISD::OR: {
  435. auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  436. if (!Op1C)
  437. return false;
  438. // If this is a 'not' op, don't touch it because that's a canonical form.
  439. const APInt &C = Op1C->getAPIntValue();
  440. if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
  441. return false;
  442. if (!C.isSubsetOf(Demanded)) {
  443. EVT VT = Op.getValueType();
  444. SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
  445. SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
  446. return TLO.CombineTo(Op, NewOp);
  447. }
  448. break;
  449. }
  450. }
  451. return false;
  452. }
  453. /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
  454. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
  455. /// generalized for targets with other types of implicit widening casts.
  456. bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
  457. const APInt &Demanded,
  458. TargetLoweringOpt &TLO) const {
  459. assert(Op.getNumOperands() == 2 &&
  460. "ShrinkDemandedOp only supports binary operators!");
  461. assert(Op.getNode()->getNumValues() == 1 &&
  462. "ShrinkDemandedOp only supports nodes with one result!");
  463. SelectionDAG &DAG = TLO.DAG;
  464. SDLoc dl(Op);
  465. // Early return, as this function cannot handle vector types.
  466. if (Op.getValueType().isVector())
  467. return false;
  468. // Don't do this if the node has another user, which may require the
  469. // full value.
  470. if (!Op.getNode()->hasOneUse())
  471. return false;
  472. // Search for the smallest integer type with free casts to and from
  473. // Op's type. For expedience, just check power-of-2 integer types.
  474. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  475. unsigned DemandedSize = Demanded.getActiveBits();
  476. unsigned SmallVTBits = DemandedSize;
  477. if (!isPowerOf2_32(SmallVTBits))
  478. SmallVTBits = NextPowerOf2(SmallVTBits);
  479. for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
  480. EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
  481. if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
  482. TLI.isZExtFree(SmallVT, Op.getValueType())) {
  483. // We found a type with free casts.
  484. SDValue X = DAG.getNode(
  485. Op.getOpcode(), dl, SmallVT,
  486. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
  487. DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
  488. assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
  489. SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
  490. return TLO.CombineTo(Op, Z);
  491. }
  492. }
  493. return false;
  494. }
  495. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  496. DAGCombinerInfo &DCI) const {
  497. SelectionDAG &DAG = DCI.DAG;
  498. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  499. !DCI.isBeforeLegalizeOps());
  500. KnownBits Known;
  501. bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
  502. if (Simplified) {
  503. DCI.AddToWorklist(Op.getNode());
  504. DCI.CommitTargetLoweringOpt(TLO);
  505. }
  506. return Simplified;
  507. }
  508. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
  509. KnownBits &Known,
  510. TargetLoweringOpt &TLO,
  511. unsigned Depth,
  512. bool AssumeSingleUse) const {
  513. EVT VT = Op.getValueType();
  514. APInt DemandedElts = VT.isVector()
  515. ? APInt::getAllOnesValue(VT.getVectorNumElements())
  516. : APInt(1, 1);
  517. return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
  518. AssumeSingleUse);
  519. }
  520. // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
  521. // TODO: Under what circumstances can we create nodes? Constant folding?
  522. SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
  523. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  524. SelectionDAG &DAG, unsigned Depth) const {
  525. // Limit search depth.
  526. if (Depth >= 6)
  527. return SDValue();
  528. // Ignore UNDEFs.
  529. if (Op.isUndef())
  530. return SDValue();
  531. // Not demanding any bits/elts from Op.
  532. if (DemandedBits == 0 || DemandedElts == 0)
  533. return DAG.getUNDEF(Op.getValueType());
  534. unsigned NumElts = DemandedElts.getBitWidth();
  535. KnownBits LHSKnown, RHSKnown;
  536. switch (Op.getOpcode()) {
  537. case ISD::BITCAST: {
  538. SDValue Src = peekThroughBitcasts(Op.getOperand(0));
  539. EVT SrcVT = Src.getValueType();
  540. EVT DstVT = Op.getValueType();
  541. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  542. unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
  543. if (NumSrcEltBits == NumDstEltBits)
  544. if (SDValue V = SimplifyMultipleUseDemandedBits(
  545. Src, DemandedBits, DemandedElts, DAG, Depth + 1))
  546. return DAG.getBitcast(DstVT, V);
  547. // TODO - bigendian once we have test coverage.
  548. if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
  549. DAG.getDataLayout().isLittleEndian()) {
  550. unsigned Scale = NumDstEltBits / NumSrcEltBits;
  551. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  552. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  553. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  554. for (unsigned i = 0; i != Scale; ++i) {
  555. unsigned Offset = i * NumSrcEltBits;
  556. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
  557. if (!Sub.isNullValue()) {
  558. DemandedSrcBits |= Sub;
  559. for (unsigned j = 0; j != NumElts; ++j)
  560. if (DemandedElts[j])
  561. DemandedSrcElts.setBit((j * Scale) + i);
  562. }
  563. }
  564. if (SDValue V = SimplifyMultipleUseDemandedBits(
  565. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  566. return DAG.getBitcast(DstVT, V);
  567. }
  568. // TODO - bigendian once we have test coverage.
  569. if ((NumSrcEltBits % NumDstEltBits) == 0 &&
  570. DAG.getDataLayout().isLittleEndian()) {
  571. unsigned Scale = NumSrcEltBits / NumDstEltBits;
  572. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  573. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  574. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  575. for (unsigned i = 0; i != NumElts; ++i)
  576. if (DemandedElts[i]) {
  577. unsigned Offset = (i % Scale) * NumDstEltBits;
  578. DemandedSrcBits.insertBits(DemandedBits, Offset);
  579. DemandedSrcElts.setBit(i / Scale);
  580. }
  581. if (SDValue V = SimplifyMultipleUseDemandedBits(
  582. Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
  583. return DAG.getBitcast(DstVT, V);
  584. }
  585. break;
  586. }
  587. case ISD::AND: {
  588. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  589. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  590. // If all of the demanded bits are known 1 on one side, return the other.
  591. // These bits cannot contribute to the result of the 'and' in this
  592. // context.
  593. if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
  594. return Op.getOperand(0);
  595. if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
  596. return Op.getOperand(1);
  597. break;
  598. }
  599. case ISD::OR: {
  600. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  601. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  602. // If all of the demanded bits are known zero on one side, return the
  603. // other. These bits cannot contribute to the result of the 'or' in this
  604. // context.
  605. if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
  606. return Op.getOperand(0);
  607. if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
  608. return Op.getOperand(1);
  609. break;
  610. }
  611. case ISD::XOR: {
  612. LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
  613. RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
  614. // If all of the demanded bits are known zero on one side, return the
  615. // other.
  616. if (DemandedBits.isSubsetOf(RHSKnown.Zero))
  617. return Op.getOperand(0);
  618. if (DemandedBits.isSubsetOf(LHSKnown.Zero))
  619. return Op.getOperand(1);
  620. break;
  621. }
  622. case ISD::SIGN_EXTEND_INREG: {
  623. // If none of the extended bits are demanded, eliminate the sextinreg.
  624. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  625. if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
  626. return Op.getOperand(0);
  627. break;
  628. }
  629. case ISD::INSERT_VECTOR_ELT: {
  630. // If we don't demand the inserted element, return the base vector.
  631. SDValue Vec = Op.getOperand(0);
  632. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  633. EVT VecVT = Vec.getValueType();
  634. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
  635. !DemandedElts[CIdx->getZExtValue()])
  636. return Vec;
  637. break;
  638. }
  639. case ISD::VECTOR_SHUFFLE: {
  640. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  641. // If all the demanded elts are from one operand and are inline,
  642. // then we can use the operand directly.
  643. bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
  644. for (unsigned i = 0; i != NumElts; ++i) {
  645. int M = ShuffleMask[i];
  646. if (M < 0 || !DemandedElts[i])
  647. continue;
  648. AllUndef = false;
  649. IdentityLHS &= (M == (int)i);
  650. IdentityRHS &= ((M - NumElts) == i);
  651. }
  652. if (AllUndef)
  653. return DAG.getUNDEF(Op.getValueType());
  654. if (IdentityLHS)
  655. return Op.getOperand(0);
  656. if (IdentityRHS)
  657. return Op.getOperand(1);
  658. break;
  659. }
  660. default:
  661. if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
  662. if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
  663. Op, DemandedBits, DemandedElts, DAG, Depth))
  664. return V;
  665. break;
  666. }
  667. return SDValue();
  668. }
  669. /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
  670. /// result of Op are ever used downstream. If we can use this information to
  671. /// simplify Op, create a new simplified DAG node and return true, returning the
  672. /// original and new nodes in Old and New. Otherwise, analyze the expression and
  673. /// return a mask of Known bits for the expression (used to simplify the
  674. /// caller). The Known bits may only be accurate for those bits in the
  675. /// OriginalDemandedBits and OriginalDemandedElts.
  676. bool TargetLowering::SimplifyDemandedBits(
  677. SDValue Op, const APInt &OriginalDemandedBits,
  678. const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
  679. unsigned Depth, bool AssumeSingleUse) const {
  680. unsigned BitWidth = OriginalDemandedBits.getBitWidth();
  681. assert(Op.getScalarValueSizeInBits() == BitWidth &&
  682. "Mask size mismatches value type size!");
  683. unsigned NumElts = OriginalDemandedElts.getBitWidth();
  684. assert((!Op.getValueType().isVector() ||
  685. NumElts == Op.getValueType().getVectorNumElements()) &&
  686. "Unexpected vector size");
  687. APInt DemandedBits = OriginalDemandedBits;
  688. APInt DemandedElts = OriginalDemandedElts;
  689. SDLoc dl(Op);
  690. auto &DL = TLO.DAG.getDataLayout();
  691. // Don't know anything.
  692. Known = KnownBits(BitWidth);
  693. // Undef operand.
  694. if (Op.isUndef())
  695. return false;
  696. if (Op.getOpcode() == ISD::Constant) {
  697. // We know all of the bits for a constant!
  698. Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
  699. Known.Zero = ~Known.One;
  700. return false;
  701. }
  702. // Other users may use these bits.
  703. EVT VT = Op.getValueType();
  704. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
  705. if (Depth != 0) {
  706. // If not at the root, Just compute the Known bits to
  707. // simplify things downstream.
  708. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  709. return false;
  710. }
  711. // If this is the root being simplified, allow it to have multiple uses,
  712. // just set the DemandedBits/Elts to all bits.
  713. DemandedBits = APInt::getAllOnesValue(BitWidth);
  714. DemandedElts = APInt::getAllOnesValue(NumElts);
  715. } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
  716. // Not demanding any bits/elts from Op.
  717. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  718. } else if (Depth >= 6) { // Limit search depth.
  719. return false;
  720. }
  721. KnownBits Known2, KnownOut;
  722. switch (Op.getOpcode()) {
  723. case ISD::SCALAR_TO_VECTOR: {
  724. if (!DemandedElts[0])
  725. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  726. KnownBits SrcKnown;
  727. SDValue Src = Op.getOperand(0);
  728. unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
  729. APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
  730. if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
  731. return true;
  732. Known = SrcKnown.zextOrTrunc(BitWidth, false);
  733. break;
  734. }
  735. case ISD::BUILD_VECTOR:
  736. // Collect the known bits that are shared by every demanded element.
  737. // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
  738. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  739. return false; // Don't fall through, will infinitely loop.
  740. case ISD::LOAD: {
  741. LoadSDNode *LD = cast<LoadSDNode>(Op);
  742. if (getTargetConstantFromLoad(LD)) {
  743. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  744. return false; // Don't fall through, will infinitely loop.
  745. }
  746. break;
  747. }
  748. case ISD::INSERT_VECTOR_ELT: {
  749. SDValue Vec = Op.getOperand(0);
  750. SDValue Scl = Op.getOperand(1);
  751. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  752. EVT VecVT = Vec.getValueType();
  753. // If index isn't constant, assume we need all vector elements AND the
  754. // inserted element.
  755. APInt DemandedVecElts(DemandedElts);
  756. if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
  757. unsigned Idx = CIdx->getZExtValue();
  758. DemandedVecElts.clearBit(Idx);
  759. // Inserted element is not required.
  760. if (!DemandedElts[Idx])
  761. return TLO.CombineTo(Op, Vec);
  762. }
  763. KnownBits KnownScl;
  764. unsigned NumSclBits = Scl.getScalarValueSizeInBits();
  765. APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
  766. if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
  767. return true;
  768. Known = KnownScl.zextOrTrunc(BitWidth, false);
  769. KnownBits KnownVec;
  770. if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
  771. Depth + 1))
  772. return true;
  773. if (!!DemandedVecElts) {
  774. Known.One &= KnownVec.One;
  775. Known.Zero &= KnownVec.Zero;
  776. }
  777. return false;
  778. }
  779. case ISD::INSERT_SUBVECTOR: {
  780. SDValue Base = Op.getOperand(0);
  781. SDValue Sub = Op.getOperand(1);
  782. EVT SubVT = Sub.getValueType();
  783. unsigned NumSubElts = SubVT.getVectorNumElements();
  784. // If index isn't constant, assume we need the original demanded base
  785. // elements and ALL the inserted subvector elements.
  786. APInt BaseElts = DemandedElts;
  787. APInt SubElts = APInt::getAllOnesValue(NumSubElts);
  788. if (isa<ConstantSDNode>(Op.getOperand(2))) {
  789. const APInt &Idx = Op.getConstantOperandAPInt(2);
  790. if (Idx.ule(NumElts - NumSubElts)) {
  791. unsigned SubIdx = Idx.getZExtValue();
  792. SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
  793. BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
  794. }
  795. }
  796. KnownBits KnownSub, KnownBase;
  797. if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
  798. Depth + 1))
  799. return true;
  800. if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
  801. Depth + 1))
  802. return true;
  803. Known.Zero.setAllBits();
  804. Known.One.setAllBits();
  805. if (!!SubElts) {
  806. Known.One &= KnownSub.One;
  807. Known.Zero &= KnownSub.Zero;
  808. }
  809. if (!!BaseElts) {
  810. Known.One &= KnownBase.One;
  811. Known.Zero &= KnownBase.Zero;
  812. }
  813. break;
  814. }
  815. case ISD::EXTRACT_SUBVECTOR: {
  816. // If index isn't constant, assume we need all the source vector elements.
  817. SDValue Src = Op.getOperand(0);
  818. ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  819. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  820. APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
  821. if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
  822. // Offset the demanded elts by the subvector index.
  823. uint64_t Idx = SubIdx->getZExtValue();
  824. SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
  825. }
  826. if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
  827. return true;
  828. break;
  829. }
  830. case ISD::CONCAT_VECTORS: {
  831. Known.Zero.setAllBits();
  832. Known.One.setAllBits();
  833. EVT SubVT = Op.getOperand(0).getValueType();
  834. unsigned NumSubVecs = Op.getNumOperands();
  835. unsigned NumSubElts = SubVT.getVectorNumElements();
  836. for (unsigned i = 0; i != NumSubVecs; ++i) {
  837. APInt DemandedSubElts =
  838. DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  839. if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
  840. Known2, TLO, Depth + 1))
  841. return true;
  842. // Known bits are shared by every demanded subvector element.
  843. if (!!DemandedSubElts) {
  844. Known.One &= Known2.One;
  845. Known.Zero &= Known2.Zero;
  846. }
  847. }
  848. break;
  849. }
  850. case ISD::VECTOR_SHUFFLE: {
  851. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  852. // Collect demanded elements from shuffle operands..
  853. APInt DemandedLHS(NumElts, 0);
  854. APInt DemandedRHS(NumElts, 0);
  855. for (unsigned i = 0; i != NumElts; ++i) {
  856. if (!DemandedElts[i])
  857. continue;
  858. int M = ShuffleMask[i];
  859. if (M < 0) {
  860. // For UNDEF elements, we don't know anything about the common state of
  861. // the shuffle result.
  862. DemandedLHS.clearAllBits();
  863. DemandedRHS.clearAllBits();
  864. break;
  865. }
  866. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  867. if (M < (int)NumElts)
  868. DemandedLHS.setBit(M);
  869. else
  870. DemandedRHS.setBit(M - NumElts);
  871. }
  872. if (!!DemandedLHS || !!DemandedRHS) {
  873. SDValue Op0 = Op.getOperand(0);
  874. SDValue Op1 = Op.getOperand(1);
  875. Known.Zero.setAllBits();
  876. Known.One.setAllBits();
  877. if (!!DemandedLHS) {
  878. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
  879. Depth + 1))
  880. return true;
  881. Known.One &= Known2.One;
  882. Known.Zero &= Known2.Zero;
  883. }
  884. if (!!DemandedRHS) {
  885. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
  886. Depth + 1))
  887. return true;
  888. Known.One &= Known2.One;
  889. Known.Zero &= Known2.Zero;
  890. }
  891. // Attempt to avoid multi-use ops if we don't need anything from them.
  892. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  893. Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
  894. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  895. Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
  896. if (DemandedOp0 || DemandedOp1) {
  897. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  898. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  899. SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
  900. return TLO.CombineTo(Op, NewOp);
  901. }
  902. }
  903. break;
  904. }
  905. case ISD::AND: {
  906. SDValue Op0 = Op.getOperand(0);
  907. SDValue Op1 = Op.getOperand(1);
  908. // If the RHS is a constant, check to see if the LHS would be zero without
  909. // using the bits from the RHS. Below, we use knowledge about the RHS to
  910. // simplify the LHS, here we're using information from the LHS to simplify
  911. // the RHS.
  912. if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
  913. // Do not increment Depth here; that can cause an infinite loop.
  914. KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
  915. // If the LHS already has zeros where RHSC does, this 'and' is dead.
  916. if ((LHSKnown.Zero & DemandedBits) ==
  917. (~RHSC->getAPIntValue() & DemandedBits))
  918. return TLO.CombineTo(Op, Op0);
  919. // If any of the set bits in the RHS are known zero on the LHS, shrink
  920. // the constant.
  921. if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
  922. return true;
  923. // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
  924. // constant, but if this 'and' is only clearing bits that were just set by
  925. // the xor, then this 'and' can be eliminated by shrinking the mask of
  926. // the xor. For example, for a 32-bit X:
  927. // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
  928. if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
  929. LHSKnown.One == ~RHSC->getAPIntValue()) {
  930. SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
  931. return TLO.CombineTo(Op, Xor);
  932. }
  933. }
  934. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  935. Depth + 1))
  936. return true;
  937. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  938. if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
  939. Known2, TLO, Depth + 1))
  940. return true;
  941. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  942. // Attempt to avoid multi-use ops if we don't need anything from them.
  943. if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  944. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  945. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  946. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  947. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  948. if (DemandedOp0 || DemandedOp1) {
  949. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  950. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  951. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  952. return TLO.CombineTo(Op, NewOp);
  953. }
  954. }
  955. // If all of the demanded bits are known one on one side, return the other.
  956. // These bits cannot contribute to the result of the 'and'.
  957. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
  958. return TLO.CombineTo(Op, Op0);
  959. if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
  960. return TLO.CombineTo(Op, Op1);
  961. // If all of the demanded bits in the inputs are known zeros, return zero.
  962. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  963. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
  964. // If the RHS is a constant, see if we can simplify it.
  965. if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
  966. return true;
  967. // If the operation can be done in a smaller type, do so.
  968. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  969. return true;
  970. // Output known-1 bits are only known if set in both the LHS & RHS.
  971. Known.One &= Known2.One;
  972. // Output known-0 are known to be clear if zero in either the LHS | RHS.
  973. Known.Zero |= Known2.Zero;
  974. break;
  975. }
  976. case ISD::OR: {
  977. SDValue Op0 = Op.getOperand(0);
  978. SDValue Op1 = Op.getOperand(1);
  979. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  980. Depth + 1))
  981. return true;
  982. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  983. if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
  984. Known2, TLO, Depth + 1))
  985. return true;
  986. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  987. // Attempt to avoid multi-use ops if we don't need anything from them.
  988. if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  989. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  990. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  991. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  992. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  993. if (DemandedOp0 || DemandedOp1) {
  994. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  995. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  996. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  997. return TLO.CombineTo(Op, NewOp);
  998. }
  999. }
  1000. // If all of the demanded bits are known zero on one side, return the other.
  1001. // These bits cannot contribute to the result of the 'or'.
  1002. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
  1003. return TLO.CombineTo(Op, Op0);
  1004. if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
  1005. return TLO.CombineTo(Op, Op1);
  1006. // If the RHS is a constant, see if we can simplify it.
  1007. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  1008. return true;
  1009. // If the operation can be done in a smaller type, do so.
  1010. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1011. return true;
  1012. // Output known-0 bits are only known if clear in both the LHS & RHS.
  1013. Known.Zero &= Known2.Zero;
  1014. // Output known-1 are known to be set if set in either the LHS | RHS.
  1015. Known.One |= Known2.One;
  1016. break;
  1017. }
  1018. case ISD::XOR: {
  1019. SDValue Op0 = Op.getOperand(0);
  1020. SDValue Op1 = Op.getOperand(1);
  1021. if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
  1022. Depth + 1))
  1023. return true;
  1024. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1025. if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
  1026. Depth + 1))
  1027. return true;
  1028. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1029. // Attempt to avoid multi-use ops if we don't need anything from them.
  1030. if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  1031. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1032. Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1033. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1034. Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
  1035. if (DemandedOp0 || DemandedOp1) {
  1036. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1037. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1038. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
  1039. return TLO.CombineTo(Op, NewOp);
  1040. }
  1041. }
  1042. // If all of the demanded bits are known zero on one side, return the other.
  1043. // These bits cannot contribute to the result of the 'xor'.
  1044. if (DemandedBits.isSubsetOf(Known.Zero))
  1045. return TLO.CombineTo(Op, Op0);
  1046. if (DemandedBits.isSubsetOf(Known2.Zero))
  1047. return TLO.CombineTo(Op, Op1);
  1048. // If the operation can be done in a smaller type, do so.
  1049. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1050. return true;
  1051. // If all of the unknown bits are known to be zero on one side or the other
  1052. // (but not both) turn this into an *inclusive* or.
  1053. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
  1054. if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
  1055. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
  1056. // Output known-0 bits are known if clear or set in both the LHS & RHS.
  1057. KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
  1058. // Output known-1 are known to be set if set in only one of the LHS, RHS.
  1059. KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
  1060. if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
  1061. // If one side is a constant, and all of the known set bits on the other
  1062. // side are also set in the constant, turn this into an AND, as we know
  1063. // the bits will be cleared.
  1064. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
  1065. // NB: it is okay if more bits are known than are requested
  1066. if (C->getAPIntValue() == Known2.One) {
  1067. SDValue ANDC =
  1068. TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
  1069. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
  1070. }
  1071. // If the RHS is a constant, see if we can change it. Don't alter a -1
  1072. // constant because that's a 'not' op, and that is better for combining
  1073. // and codegen.
  1074. if (!C->isAllOnesValue()) {
  1075. if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
  1076. // We're flipping all demanded bits. Flip the undemanded bits too.
  1077. SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
  1078. return TLO.CombineTo(Op, New);
  1079. }
  1080. // If we can't turn this into a 'not', try to shrink the constant.
  1081. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  1082. return true;
  1083. }
  1084. }
  1085. Known = std::move(KnownOut);
  1086. break;
  1087. }
  1088. case ISD::SELECT:
  1089. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
  1090. Depth + 1))
  1091. return true;
  1092. if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
  1093. Depth + 1))
  1094. return true;
  1095. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1096. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1097. // If the operands are constants, see if we can simplify them.
  1098. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  1099. return true;
  1100. // Only known if known in both the LHS and RHS.
  1101. Known.One &= Known2.One;
  1102. Known.Zero &= Known2.Zero;
  1103. break;
  1104. case ISD::SELECT_CC:
  1105. if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
  1106. Depth + 1))
  1107. return true;
  1108. if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
  1109. Depth + 1))
  1110. return true;
  1111. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1112. assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
  1113. // If the operands are constants, see if we can simplify them.
  1114. if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
  1115. return true;
  1116. // Only known if known in both the LHS and RHS.
  1117. Known.One &= Known2.One;
  1118. Known.Zero &= Known2.Zero;
  1119. break;
  1120. case ISD::SETCC: {
  1121. SDValue Op0 = Op.getOperand(0);
  1122. SDValue Op1 = Op.getOperand(1);
  1123. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  1124. // If (1) we only need the sign-bit, (2) the setcc operands are the same
  1125. // width as the setcc result, and (3) the result of a setcc conforms to 0 or
  1126. // -1, we may be able to bypass the setcc.
  1127. if (DemandedBits.isSignMask() &&
  1128. Op0.getScalarValueSizeInBits() == BitWidth &&
  1129. getBooleanContents(VT) ==
  1130. BooleanContent::ZeroOrNegativeOneBooleanContent) {
  1131. // If we're testing X < 0, then this compare isn't needed - just use X!
  1132. // FIXME: We're limiting to integer types here, but this should also work
  1133. // if we don't care about FP signed-zero. The use of SETLT with FP means
  1134. // that we don't care about NaNs.
  1135. if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
  1136. (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
  1137. return TLO.CombineTo(Op, Op0);
  1138. // TODO: Should we check for other forms of sign-bit comparisons?
  1139. // Examples: X <= -1, X >= 0
  1140. }
  1141. if (getBooleanContents(Op0.getValueType()) ==
  1142. TargetLowering::ZeroOrOneBooleanContent &&
  1143. BitWidth > 1)
  1144. Known.Zero.setBitsFrom(1);
  1145. break;
  1146. }
  1147. case ISD::SHL: {
  1148. SDValue Op0 = Op.getOperand(0);
  1149. SDValue Op1 = Op.getOperand(1);
  1150. if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
  1151. // If the shift count is an invalid immediate, don't do anything.
  1152. if (SA->getAPIntValue().uge(BitWidth))
  1153. break;
  1154. unsigned ShAmt = SA->getZExtValue();
  1155. if (ShAmt == 0)
  1156. return TLO.CombineTo(Op, Op0);
  1157. // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
  1158. // single shift. We can do this if the bottom bits (which are shifted
  1159. // out) are never demanded.
  1160. // TODO - support non-uniform vector amounts.
  1161. if (Op0.getOpcode() == ISD::SRL) {
  1162. if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
  1163. if (ConstantSDNode *SA2 =
  1164. isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
  1165. if (SA2->getAPIntValue().ult(BitWidth)) {
  1166. unsigned C1 = SA2->getZExtValue();
  1167. unsigned Opc = ISD::SHL;
  1168. int Diff = ShAmt - C1;
  1169. if (Diff < 0) {
  1170. Diff = -Diff;
  1171. Opc = ISD::SRL;
  1172. }
  1173. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
  1174. return TLO.CombineTo(
  1175. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1176. }
  1177. }
  1178. }
  1179. }
  1180. if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
  1181. Known, TLO, Depth + 1))
  1182. return true;
  1183. // Try shrinking the operation as long as the shift amount will still be
  1184. // in range.
  1185. if ((ShAmt < DemandedBits.getActiveBits()) &&
  1186. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
  1187. return true;
  1188. // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
  1189. // are not demanded. This will likely allow the anyext to be folded away.
  1190. if (Op0.getOpcode() == ISD::ANY_EXTEND) {
  1191. SDValue InnerOp = Op0.getOperand(0);
  1192. EVT InnerVT = InnerOp.getValueType();
  1193. unsigned InnerBits = InnerVT.getScalarSizeInBits();
  1194. if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
  1195. isTypeDesirableForOp(ISD::SHL, InnerVT)) {
  1196. EVT ShTy = getShiftAmountTy(InnerVT, DL);
  1197. if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
  1198. ShTy = InnerVT;
  1199. SDValue NarrowShl =
  1200. TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
  1201. TLO.DAG.getConstant(ShAmt, dl, ShTy));
  1202. return TLO.CombineTo(
  1203. Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
  1204. }
  1205. // Repeat the SHL optimization above in cases where an extension
  1206. // intervenes: (shl (anyext (shr x, c1)), c2) to
  1207. // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
  1208. // aren't demanded (as above) and that the shifted upper c1 bits of
  1209. // x aren't demanded.
  1210. if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
  1211. InnerOp.hasOneUse()) {
  1212. if (ConstantSDNode *SA2 =
  1213. isConstOrConstSplat(InnerOp.getOperand(1))) {
  1214. unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
  1215. if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
  1216. DemandedBits.getActiveBits() <=
  1217. (InnerBits - InnerShAmt + ShAmt) &&
  1218. DemandedBits.countTrailingZeros() >= ShAmt) {
  1219. SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
  1220. Op1.getValueType());
  1221. SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
  1222. InnerOp.getOperand(0));
  1223. return TLO.CombineTo(
  1224. Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
  1225. }
  1226. }
  1227. }
  1228. }
  1229. Known.Zero <<= ShAmt;
  1230. Known.One <<= ShAmt;
  1231. // low bits known zero.
  1232. Known.Zero.setLowBits(ShAmt);
  1233. }
  1234. break;
  1235. }
  1236. case ISD::SRL: {
  1237. SDValue Op0 = Op.getOperand(0);
  1238. SDValue Op1 = Op.getOperand(1);
  1239. if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
  1240. // If the shift count is an invalid immediate, don't do anything.
  1241. if (SA->getAPIntValue().uge(BitWidth))
  1242. break;
  1243. unsigned ShAmt = SA->getZExtValue();
  1244. if (ShAmt == 0)
  1245. return TLO.CombineTo(Op, Op0);
  1246. EVT ShiftVT = Op1.getValueType();
  1247. APInt InDemandedMask = (DemandedBits << ShAmt);
  1248. // If the shift is exact, then it does demand the low bits (and knows that
  1249. // they are zero).
  1250. if (Op->getFlags().hasExact())
  1251. InDemandedMask.setLowBits(ShAmt);
  1252. // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
  1253. // single shift. We can do this if the top bits (which are shifted out)
  1254. // are never demanded.
  1255. // TODO - support non-uniform vector amounts.
  1256. if (Op0.getOpcode() == ISD::SHL) {
  1257. if (ConstantSDNode *SA2 =
  1258. isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
  1259. if (!DemandedBits.intersects(
  1260. APInt::getHighBitsSet(BitWidth, ShAmt))) {
  1261. if (SA2->getAPIntValue().ult(BitWidth)) {
  1262. unsigned C1 = SA2->getZExtValue();
  1263. unsigned Opc = ISD::SRL;
  1264. int Diff = ShAmt - C1;
  1265. if (Diff < 0) {
  1266. Diff = -Diff;
  1267. Opc = ISD::SHL;
  1268. }
  1269. SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
  1270. return TLO.CombineTo(
  1271. Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
  1272. }
  1273. }
  1274. }
  1275. }
  1276. // Compute the new bits that are at the top now.
  1277. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1278. Depth + 1))
  1279. return true;
  1280. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1281. Known.Zero.lshrInPlace(ShAmt);
  1282. Known.One.lshrInPlace(ShAmt);
  1283. Known.Zero.setHighBits(ShAmt); // High bits known zero.
  1284. }
  1285. break;
  1286. }
  1287. case ISD::SRA: {
  1288. SDValue Op0 = Op.getOperand(0);
  1289. SDValue Op1 = Op.getOperand(1);
  1290. // If this is an arithmetic shift right and only the low-bit is set, we can
  1291. // always convert this into a logical shr, even if the shift amount is
  1292. // variable. The low bit of the shift cannot be an input sign bit unless
  1293. // the shift amount is >= the size of the datatype, which is undefined.
  1294. if (DemandedBits.isOneValue())
  1295. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
  1296. if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
  1297. // If the shift count is an invalid immediate, don't do anything.
  1298. if (SA->getAPIntValue().uge(BitWidth))
  1299. break;
  1300. unsigned ShAmt = SA->getZExtValue();
  1301. if (ShAmt == 0)
  1302. return TLO.CombineTo(Op, Op0);
  1303. APInt InDemandedMask = (DemandedBits << ShAmt);
  1304. // If the shift is exact, then it does demand the low bits (and knows that
  1305. // they are zero).
  1306. if (Op->getFlags().hasExact())
  1307. InDemandedMask.setLowBits(ShAmt);
  1308. // If any of the demanded bits are produced by the sign extension, we also
  1309. // demand the input sign bit.
  1310. if (DemandedBits.countLeadingZeros() < ShAmt)
  1311. InDemandedMask.setSignBit();
  1312. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
  1313. Depth + 1))
  1314. return true;
  1315. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1316. Known.Zero.lshrInPlace(ShAmt);
  1317. Known.One.lshrInPlace(ShAmt);
  1318. // If the input sign bit is known to be zero, or if none of the top bits
  1319. // are demanded, turn this into an unsigned shift right.
  1320. if (Known.Zero[BitWidth - ShAmt - 1] ||
  1321. DemandedBits.countLeadingZeros() >= ShAmt) {
  1322. SDNodeFlags Flags;
  1323. Flags.setExact(Op->getFlags().hasExact());
  1324. return TLO.CombineTo(
  1325. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
  1326. }
  1327. int Log2 = DemandedBits.exactLogBase2();
  1328. if (Log2 >= 0) {
  1329. // The bit must come from the sign.
  1330. SDValue NewSA =
  1331. TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
  1332. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
  1333. }
  1334. if (Known.One[BitWidth - ShAmt - 1])
  1335. // New bits are known one.
  1336. Known.One.setHighBits(ShAmt);
  1337. }
  1338. break;
  1339. }
  1340. case ISD::FSHL:
  1341. case ISD::FSHR: {
  1342. SDValue Op0 = Op.getOperand(0);
  1343. SDValue Op1 = Op.getOperand(1);
  1344. SDValue Op2 = Op.getOperand(2);
  1345. bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
  1346. if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
  1347. unsigned Amt = SA->getAPIntValue().urem(BitWidth);
  1348. // For fshl, 0-shift returns the 1st arg.
  1349. // For fshr, 0-shift returns the 2nd arg.
  1350. if (Amt == 0) {
  1351. if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
  1352. Known, TLO, Depth + 1))
  1353. return true;
  1354. break;
  1355. }
  1356. // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
  1357. // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
  1358. APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
  1359. APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
  1360. if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
  1361. Depth + 1))
  1362. return true;
  1363. if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
  1364. Depth + 1))
  1365. return true;
  1366. Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1367. Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
  1368. Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1369. Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
  1370. Known.One |= Known2.One;
  1371. Known.Zero |= Known2.Zero;
  1372. }
  1373. break;
  1374. }
  1375. case ISD::BITREVERSE: {
  1376. SDValue Src = Op.getOperand(0);
  1377. APInt DemandedSrcBits = DemandedBits.reverseBits();
  1378. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
  1379. Depth + 1))
  1380. return true;
  1381. Known.One = Known2.One.reverseBits();
  1382. Known.Zero = Known2.Zero.reverseBits();
  1383. break;
  1384. }
  1385. case ISD::SIGN_EXTEND_INREG: {
  1386. SDValue Op0 = Op.getOperand(0);
  1387. EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1388. unsigned ExVTBits = ExVT.getScalarSizeInBits();
  1389. // If we only care about the highest bit, don't bother shifting right.
  1390. if (DemandedBits.isSignMask()) {
  1391. unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
  1392. bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
  1393. // However if the input is already sign extended we expect the sign
  1394. // extension to be dropped altogether later and do not simplify.
  1395. if (!AlreadySignExtended) {
  1396. // Compute the correct shift amount type, which must be getShiftAmountTy
  1397. // for scalar types after legalization.
  1398. EVT ShiftAmtTy = VT;
  1399. if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
  1400. ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
  1401. SDValue ShiftAmt =
  1402. TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
  1403. return TLO.CombineTo(Op,
  1404. TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
  1405. }
  1406. }
  1407. // If none of the extended bits are demanded, eliminate the sextinreg.
  1408. if (DemandedBits.getActiveBits() <= ExVTBits)
  1409. return TLO.CombineTo(Op, Op0);
  1410. APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
  1411. // Since the sign extended bits are demanded, we know that the sign
  1412. // bit is demanded.
  1413. InputDemandedBits.setBit(ExVTBits - 1);
  1414. if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
  1415. return true;
  1416. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1417. // If the sign bit of the input is known set or clear, then we know the
  1418. // top bits of the result.
  1419. // If the input sign bit is known zero, convert this into a zero extension.
  1420. if (Known.Zero[ExVTBits - 1])
  1421. return TLO.CombineTo(
  1422. Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
  1423. APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
  1424. if (Known.One[ExVTBits - 1]) { // Input sign bit known set
  1425. Known.One.setBitsFrom(ExVTBits);
  1426. Known.Zero &= Mask;
  1427. } else { // Input sign bit unknown
  1428. Known.Zero &= Mask;
  1429. Known.One &= Mask;
  1430. }
  1431. break;
  1432. }
  1433. case ISD::BUILD_PAIR: {
  1434. EVT HalfVT = Op.getOperand(0).getValueType();
  1435. unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
  1436. APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
  1437. APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
  1438. KnownBits KnownLo, KnownHi;
  1439. if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
  1440. return true;
  1441. if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
  1442. return true;
  1443. Known.Zero = KnownLo.Zero.zext(BitWidth) |
  1444. KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
  1445. Known.One = KnownLo.One.zext(BitWidth) |
  1446. KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
  1447. break;
  1448. }
  1449. case ISD::ZERO_EXTEND:
  1450. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  1451. SDValue Src = Op.getOperand(0);
  1452. EVT SrcVT = Src.getValueType();
  1453. unsigned InBits = SrcVT.getScalarSizeInBits();
  1454. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1455. bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
  1456. // If none of the top bits are demanded, convert this into an any_extend.
  1457. if (DemandedBits.getActiveBits() <= InBits) {
  1458. // If we only need the non-extended bits of the bottom element
  1459. // then we can just bitcast to the result.
  1460. if (IsVecInReg && DemandedElts == 1 &&
  1461. VT.getSizeInBits() == SrcVT.getSizeInBits() &&
  1462. TLO.DAG.getDataLayout().isLittleEndian())
  1463. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1464. unsigned Opc =
  1465. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  1466. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1467. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1468. }
  1469. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1470. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1471. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1472. Depth + 1))
  1473. return true;
  1474. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1475. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1476. Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
  1477. break;
  1478. }
  1479. case ISD::SIGN_EXTEND:
  1480. case ISD::SIGN_EXTEND_VECTOR_INREG: {
  1481. SDValue Src = Op.getOperand(0);
  1482. EVT SrcVT = Src.getValueType();
  1483. unsigned InBits = SrcVT.getScalarSizeInBits();
  1484. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1485. bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
  1486. // If none of the top bits are demanded, convert this into an any_extend.
  1487. if (DemandedBits.getActiveBits() <= InBits) {
  1488. // If we only need the non-extended bits of the bottom element
  1489. // then we can just bitcast to the result.
  1490. if (IsVecInReg && DemandedElts == 1 &&
  1491. VT.getSizeInBits() == SrcVT.getSizeInBits() &&
  1492. TLO.DAG.getDataLayout().isLittleEndian())
  1493. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1494. unsigned Opc =
  1495. IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
  1496. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1497. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1498. }
  1499. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1500. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1501. // Since some of the sign extended bits are demanded, we know that the sign
  1502. // bit is demanded.
  1503. InDemandedBits.setBit(InBits - 1);
  1504. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1505. Depth + 1))
  1506. return true;
  1507. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1508. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1509. // If the sign bit is known one, the top bits match.
  1510. Known = Known.sext(BitWidth);
  1511. // If the sign bit is known zero, convert this to a zero extend.
  1512. if (Known.isNonNegative()) {
  1513. unsigned Opc =
  1514. IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
  1515. if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
  1516. return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
  1517. }
  1518. break;
  1519. }
  1520. case ISD::ANY_EXTEND:
  1521. case ISD::ANY_EXTEND_VECTOR_INREG: {
  1522. SDValue Src = Op.getOperand(0);
  1523. EVT SrcVT = Src.getValueType();
  1524. unsigned InBits = SrcVT.getScalarSizeInBits();
  1525. unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1526. bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
  1527. // If we only need the bottom element then we can just bitcast.
  1528. // TODO: Handle ANY_EXTEND?
  1529. if (IsVecInReg && DemandedElts == 1 &&
  1530. VT.getSizeInBits() == SrcVT.getSizeInBits() &&
  1531. TLO.DAG.getDataLayout().isLittleEndian())
  1532. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  1533. APInt InDemandedBits = DemandedBits.trunc(InBits);
  1534. APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
  1535. if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
  1536. Depth + 1))
  1537. return true;
  1538. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1539. assert(Known.getBitWidth() == InBits && "Src width has changed?");
  1540. Known = Known.zext(BitWidth, false /* => any extend */);
  1541. break;
  1542. }
  1543. case ISD::TRUNCATE: {
  1544. SDValue Src = Op.getOperand(0);
  1545. // Simplify the input, using demanded bit information, and compute the known
  1546. // zero/one bits live out.
  1547. unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
  1548. APInt TruncMask = DemandedBits.zext(OperandBitWidth);
  1549. if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
  1550. return true;
  1551. Known = Known.trunc(BitWidth);
  1552. // Attempt to avoid multi-use ops if we don't need anything from them.
  1553. if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
  1554. Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
  1555. return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
  1556. // If the input is only used by this truncate, see if we can shrink it based
  1557. // on the known demanded bits.
  1558. if (Src.getNode()->hasOneUse()) {
  1559. switch (Src.getOpcode()) {
  1560. default:
  1561. break;
  1562. case ISD::SRL:
  1563. // Shrink SRL by a constant if none of the high bits shifted in are
  1564. // demanded.
  1565. if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
  1566. // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
  1567. // undesirable.
  1568. break;
  1569. auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
  1570. if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
  1571. break;
  1572. SDValue Shift = Src.getOperand(1);
  1573. uint64_t ShVal = ShAmt->getZExtValue();
  1574. if (TLO.LegalTypes())
  1575. Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
  1576. APInt HighBits =
  1577. APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
  1578. HighBits.lshrInPlace(ShVal);
  1579. HighBits = HighBits.trunc(BitWidth);
  1580. if (!(HighBits & DemandedBits)) {
  1581. // None of the shifted in bits are needed. Add a truncate of the
  1582. // shift input, then shift it.
  1583. SDValue NewTrunc =
  1584. TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
  1585. return TLO.CombineTo(
  1586. Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
  1587. }
  1588. break;
  1589. }
  1590. }
  1591. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1592. break;
  1593. }
  1594. case ISD::AssertZext: {
  1595. // AssertZext demands all of the high bits, plus any of the low bits
  1596. // demanded by its users.
  1597. EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  1598. APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
  1599. if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
  1600. TLO, Depth + 1))
  1601. return true;
  1602. assert(!Known.hasConflict() && "Bits known to be one AND zero?");
  1603. Known.Zero |= ~InMask;
  1604. break;
  1605. }
  1606. case ISD::EXTRACT_VECTOR_ELT: {
  1607. SDValue Src = Op.getOperand(0);
  1608. SDValue Idx = Op.getOperand(1);
  1609. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  1610. unsigned EltBitWidth = Src.getScalarValueSizeInBits();
  1611. // Demand the bits from every vector element without a constant index.
  1612. APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
  1613. if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
  1614. if (CIdx->getAPIntValue().ult(NumSrcElts))
  1615. DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
  1616. // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
  1617. // anything about the extended bits.
  1618. APInt DemandedSrcBits = DemandedBits;
  1619. if (BitWidth > EltBitWidth)
  1620. DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
  1621. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
  1622. Depth + 1))
  1623. return true;
  1624. Known = Known2;
  1625. if (BitWidth > EltBitWidth)
  1626. Known = Known.zext(BitWidth, false /* => any extend */);
  1627. break;
  1628. }
  1629. case ISD::BITCAST: {
  1630. SDValue Src = Op.getOperand(0);
  1631. EVT SrcVT = Src.getValueType();
  1632. unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
  1633. // If this is an FP->Int bitcast and if the sign bit is the only
  1634. // thing demanded, turn this into a FGETSIGN.
  1635. if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
  1636. DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
  1637. SrcVT.isFloatingPoint()) {
  1638. bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
  1639. bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
  1640. if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
  1641. SrcVT != MVT::f128) {
  1642. // Cannot eliminate/lower SHL for f128 yet.
  1643. EVT Ty = OpVTLegal ? VT : MVT::i32;
  1644. // Make a FGETSIGN + SHL to move the sign bit into the appropriate
  1645. // place. We expect the SHL to be eliminated by other optimizations.
  1646. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
  1647. unsigned OpVTSizeInBits = Op.getValueSizeInBits();
  1648. if (!OpVTLegal && OpVTSizeInBits > 32)
  1649. Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
  1650. unsigned ShVal = Op.getValueSizeInBits() - 1;
  1651. SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
  1652. return TLO.CombineTo(Op,
  1653. TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
  1654. }
  1655. }
  1656. // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
  1657. // Demand the elt/bit if any of the original elts/bits are demanded.
  1658. // TODO - bigendian once we have test coverage.
  1659. if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
  1660. TLO.DAG.getDataLayout().isLittleEndian()) {
  1661. unsigned Scale = BitWidth / NumSrcEltBits;
  1662. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  1663. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  1664. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  1665. for (unsigned i = 0; i != Scale; ++i) {
  1666. unsigned Offset = i * NumSrcEltBits;
  1667. APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
  1668. if (!Sub.isNullValue()) {
  1669. DemandedSrcBits |= Sub;
  1670. for (unsigned j = 0; j != NumElts; ++j)
  1671. if (DemandedElts[j])
  1672. DemandedSrcElts.setBit((j * Scale) + i);
  1673. }
  1674. }
  1675. APInt KnownSrcUndef, KnownSrcZero;
  1676. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  1677. KnownSrcZero, TLO, Depth + 1))
  1678. return true;
  1679. KnownBits KnownSrcBits;
  1680. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  1681. KnownSrcBits, TLO, Depth + 1))
  1682. return true;
  1683. } else if ((NumSrcEltBits % BitWidth) == 0 &&
  1684. TLO.DAG.getDataLayout().isLittleEndian()) {
  1685. unsigned Scale = NumSrcEltBits / BitWidth;
  1686. unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
  1687. APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
  1688. APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
  1689. for (unsigned i = 0; i != NumElts; ++i)
  1690. if (DemandedElts[i]) {
  1691. unsigned Offset = (i % Scale) * BitWidth;
  1692. DemandedSrcBits.insertBits(DemandedBits, Offset);
  1693. DemandedSrcElts.setBit(i / Scale);
  1694. }
  1695. if (SrcVT.isVector()) {
  1696. APInt KnownSrcUndef, KnownSrcZero;
  1697. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
  1698. KnownSrcZero, TLO, Depth + 1))
  1699. return true;
  1700. }
  1701. KnownBits KnownSrcBits;
  1702. if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
  1703. KnownSrcBits, TLO, Depth + 1))
  1704. return true;
  1705. }
  1706. // If this is a bitcast, let computeKnownBits handle it. Only do this on a
  1707. // recursive call where Known may be useful to the caller.
  1708. if (Depth > 0) {
  1709. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1710. return false;
  1711. }
  1712. break;
  1713. }
  1714. case ISD::ADD:
  1715. case ISD::MUL:
  1716. case ISD::SUB: {
  1717. // Add, Sub, and Mul don't demand any bits in positions beyond that
  1718. // of the highest bit demanded of them.
  1719. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
  1720. SDNodeFlags Flags = Op.getNode()->getFlags();
  1721. unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
  1722. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
  1723. if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
  1724. Depth + 1) ||
  1725. SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
  1726. Depth + 1) ||
  1727. // See if the operation should be performed at a smaller bit width.
  1728. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
  1729. if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
  1730. // Disable the nsw and nuw flags. We can no longer guarantee that we
  1731. // won't wrap after simplification.
  1732. Flags.setNoSignedWrap(false);
  1733. Flags.setNoUnsignedWrap(false);
  1734. SDValue NewOp =
  1735. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  1736. return TLO.CombineTo(Op, NewOp);
  1737. }
  1738. return true;
  1739. }
  1740. // Attempt to avoid multi-use ops if we don't need anything from them.
  1741. if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
  1742. SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
  1743. Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  1744. SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
  1745. Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
  1746. if (DemandedOp0 || DemandedOp1) {
  1747. Flags.setNoSignedWrap(false);
  1748. Flags.setNoUnsignedWrap(false);
  1749. Op0 = DemandedOp0 ? DemandedOp0 : Op0;
  1750. Op1 = DemandedOp1 ? DemandedOp1 : Op1;
  1751. SDValue NewOp =
  1752. TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
  1753. return TLO.CombineTo(Op, NewOp);
  1754. }
  1755. }
  1756. // If we have a constant operand, we may be able to turn it into -1 if we
  1757. // do not demand the high bits. This can make the constant smaller to
  1758. // encode, allow more general folding, or match specialized instruction
  1759. // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
  1760. // is probably not useful (and could be detrimental).
  1761. ConstantSDNode *C = isConstOrConstSplat(Op1);
  1762. APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
  1763. if (C && !C->isAllOnesValue() && !C->isOne() &&
  1764. (C->getAPIntValue() | HighMask).isAllOnesValue()) {
  1765. SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
  1766. // Disable the nsw and nuw flags. We can no longer guarantee that we
  1767. // won't wrap after simplification.
  1768. Flags.setNoSignedWrap(false);
  1769. Flags.setNoUnsignedWrap(false);
  1770. SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
  1771. return TLO.CombineTo(Op, NewOp);
  1772. }
  1773. LLVM_FALLTHROUGH;
  1774. }
  1775. default:
  1776. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  1777. if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
  1778. Known, TLO, Depth))
  1779. return true;
  1780. break;
  1781. }
  1782. // Just use computeKnownBits to compute output bits.
  1783. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
  1784. break;
  1785. }
  1786. // If we know the value of all of the demanded bits, return this as a
  1787. // constant.
  1788. if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
  1789. // Avoid folding to a constant if any OpaqueConstant is involved.
  1790. const SDNode *N = Op.getNode();
  1791. for (SDNodeIterator I = SDNodeIterator::begin(N),
  1792. E = SDNodeIterator::end(N);
  1793. I != E; ++I) {
  1794. SDNode *Op = *I;
  1795. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
  1796. if (C->isOpaque())
  1797. return false;
  1798. }
  1799. // TODO: Handle float bits as well.
  1800. if (VT.isInteger())
  1801. return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
  1802. }
  1803. return false;
  1804. }
  1805. bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
  1806. const APInt &DemandedElts,
  1807. APInt &KnownUndef,
  1808. APInt &KnownZero,
  1809. DAGCombinerInfo &DCI) const {
  1810. SelectionDAG &DAG = DCI.DAG;
  1811. TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
  1812. !DCI.isBeforeLegalizeOps());
  1813. bool Simplified =
  1814. SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
  1815. if (Simplified) {
  1816. DCI.AddToWorklist(Op.getNode());
  1817. DCI.CommitTargetLoweringOpt(TLO);
  1818. }
  1819. return Simplified;
  1820. }
  1821. /// Given a vector binary operation and known undefined elements for each input
  1822. /// operand, compute whether each element of the output is undefined.
  1823. static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
  1824. const APInt &UndefOp0,
  1825. const APInt &UndefOp1) {
  1826. EVT VT = BO.getValueType();
  1827. assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
  1828. "Vector binop only");
  1829. EVT EltVT = VT.getVectorElementType();
  1830. unsigned NumElts = VT.getVectorNumElements();
  1831. assert(UndefOp0.getBitWidth() == NumElts &&
  1832. UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
  1833. auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
  1834. const APInt &UndefVals) {
  1835. if (UndefVals[Index])
  1836. return DAG.getUNDEF(EltVT);
  1837. if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
  1838. // Try hard to make sure that the getNode() call is not creating temporary
  1839. // nodes. Ignore opaque integers because they do not constant fold.
  1840. SDValue Elt = BV->getOperand(Index);
  1841. auto *C = dyn_cast<ConstantSDNode>(Elt);
  1842. if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
  1843. return Elt;
  1844. }
  1845. return SDValue();
  1846. };
  1847. APInt KnownUndef = APInt::getNullValue(NumElts);
  1848. for (unsigned i = 0; i != NumElts; ++i) {
  1849. // If both inputs for this element are either constant or undef and match
  1850. // the element type, compute the constant/undef result for this element of
  1851. // the vector.
  1852. // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
  1853. // not handle FP constants. The code within getNode() should be refactored
  1854. // to avoid the danger of creating a bogus temporary node here.
  1855. SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
  1856. SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
  1857. if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
  1858. if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
  1859. KnownUndef.setBit(i);
  1860. }
  1861. return KnownUndef;
  1862. }
  1863. bool TargetLowering::SimplifyDemandedVectorElts(
  1864. SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
  1865. APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
  1866. bool AssumeSingleUse) const {
  1867. EVT VT = Op.getValueType();
  1868. APInt DemandedElts = OriginalDemandedElts;
  1869. unsigned NumElts = DemandedElts.getBitWidth();
  1870. assert(VT.isVector() && "Expected vector op");
  1871. assert(VT.getVectorNumElements() == NumElts &&
  1872. "Mask size mismatches value type element count!");
  1873. KnownUndef = KnownZero = APInt::getNullValue(NumElts);
  1874. // Undef operand.
  1875. if (Op.isUndef()) {
  1876. KnownUndef.setAllBits();
  1877. return false;
  1878. }
  1879. // If Op has other users, assume that all elements are needed.
  1880. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
  1881. DemandedElts.setAllBits();
  1882. // Not demanding any elements from Op.
  1883. if (DemandedElts == 0) {
  1884. KnownUndef.setAllBits();
  1885. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1886. }
  1887. // Limit search depth.
  1888. if (Depth >= 6)
  1889. return false;
  1890. SDLoc DL(Op);
  1891. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  1892. switch (Op.getOpcode()) {
  1893. case ISD::SCALAR_TO_VECTOR: {
  1894. if (!DemandedElts[0]) {
  1895. KnownUndef.setAllBits();
  1896. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  1897. }
  1898. KnownUndef.setHighBits(NumElts - 1);
  1899. break;
  1900. }
  1901. case ISD::BITCAST: {
  1902. SDValue Src = Op.getOperand(0);
  1903. EVT SrcVT = Src.getValueType();
  1904. // We only handle vectors here.
  1905. // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
  1906. if (!SrcVT.isVector())
  1907. break;
  1908. // Fast handling of 'identity' bitcasts.
  1909. unsigned NumSrcElts = SrcVT.getVectorNumElements();
  1910. if (NumSrcElts == NumElts)
  1911. return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
  1912. KnownZero, TLO, Depth + 1);
  1913. APInt SrcZero, SrcUndef;
  1914. APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
  1915. // Bitcast from 'large element' src vector to 'small element' vector, we
  1916. // must demand a source element if any DemandedElt maps to it.
  1917. if ((NumElts % NumSrcElts) == 0) {
  1918. unsigned Scale = NumElts / NumSrcElts;
  1919. for (unsigned i = 0; i != NumElts; ++i)
  1920. if (DemandedElts[i])
  1921. SrcDemandedElts.setBit(i / Scale);
  1922. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  1923. TLO, Depth + 1))
  1924. return true;
  1925. // Try calling SimplifyDemandedBits, converting demanded elts to the bits
  1926. // of the large element.
  1927. // TODO - bigendian once we have test coverage.
  1928. if (TLO.DAG.getDataLayout().isLittleEndian()) {
  1929. unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
  1930. APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
  1931. for (unsigned i = 0; i != NumElts; ++i)
  1932. if (DemandedElts[i]) {
  1933. unsigned Ofs = (i % Scale) * EltSizeInBits;
  1934. SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
  1935. }
  1936. KnownBits Known;
  1937. if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
  1938. return true;
  1939. }
  1940. // If the src element is zero/undef then all the output elements will be -
  1941. // only demanded elements are guaranteed to be correct.
  1942. for (unsigned i = 0; i != NumSrcElts; ++i) {
  1943. if (SrcDemandedElts[i]) {
  1944. if (SrcZero[i])
  1945. KnownZero.setBits(i * Scale, (i + 1) * Scale);
  1946. if (SrcUndef[i])
  1947. KnownUndef.setBits(i * Scale, (i + 1) * Scale);
  1948. }
  1949. }
  1950. }
  1951. // Bitcast from 'small element' src vector to 'large element' vector, we
  1952. // demand all smaller source elements covered by the larger demanded element
  1953. // of this vector.
  1954. if ((NumSrcElts % NumElts) == 0) {
  1955. unsigned Scale = NumSrcElts / NumElts;
  1956. for (unsigned i = 0; i != NumElts; ++i)
  1957. if (DemandedElts[i])
  1958. SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
  1959. if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
  1960. TLO, Depth + 1))
  1961. return true;
  1962. // If all the src elements covering an output element are zero/undef, then
  1963. // the output element will be as well, assuming it was demanded.
  1964. for (unsigned i = 0; i != NumElts; ++i) {
  1965. if (DemandedElts[i]) {
  1966. if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
  1967. KnownZero.setBit(i);
  1968. if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
  1969. KnownUndef.setBit(i);
  1970. }
  1971. }
  1972. }
  1973. break;
  1974. }
  1975. case ISD::BUILD_VECTOR: {
  1976. // Check all elements and simplify any unused elements with UNDEF.
  1977. if (!DemandedElts.isAllOnesValue()) {
  1978. // Don't simplify BROADCASTS.
  1979. if (llvm::any_of(Op->op_values(),
  1980. [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
  1981. SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
  1982. bool Updated = false;
  1983. for (unsigned i = 0; i != NumElts; ++i) {
  1984. if (!DemandedElts[i] && !Ops[i].isUndef()) {
  1985. Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
  1986. KnownUndef.setBit(i);
  1987. Updated = true;
  1988. }
  1989. }
  1990. if (Updated)
  1991. return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
  1992. }
  1993. }
  1994. for (unsigned i = 0; i != NumElts; ++i) {
  1995. SDValue SrcOp = Op.getOperand(i);
  1996. if (SrcOp.isUndef()) {
  1997. KnownUndef.setBit(i);
  1998. } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
  1999. (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
  2000. KnownZero.setBit(i);
  2001. }
  2002. }
  2003. break;
  2004. }
  2005. case ISD::CONCAT_VECTORS: {
  2006. EVT SubVT = Op.getOperand(0).getValueType();
  2007. unsigned NumSubVecs = Op.getNumOperands();
  2008. unsigned NumSubElts = SubVT.getVectorNumElements();
  2009. for (unsigned i = 0; i != NumSubVecs; ++i) {
  2010. SDValue SubOp = Op.getOperand(i);
  2011. APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
  2012. APInt SubUndef, SubZero;
  2013. if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
  2014. Depth + 1))
  2015. return true;
  2016. KnownUndef.insertBits(SubUndef, i * NumSubElts);
  2017. KnownZero.insertBits(SubZero, i * NumSubElts);
  2018. }
  2019. break;
  2020. }
  2021. case ISD::INSERT_SUBVECTOR: {
  2022. if (!isa<ConstantSDNode>(Op.getOperand(2)))
  2023. break;
  2024. SDValue Base = Op.getOperand(0);
  2025. SDValue Sub = Op.getOperand(1);
  2026. EVT SubVT = Sub.getValueType();
  2027. unsigned NumSubElts = SubVT.getVectorNumElements();
  2028. const APInt &Idx = Op.getConstantOperandAPInt(2);
  2029. if (Idx.ugt(NumElts - NumSubElts))
  2030. break;
  2031. unsigned SubIdx = Idx.getZExtValue();
  2032. APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
  2033. APInt SubUndef, SubZero;
  2034. if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
  2035. Depth + 1))
  2036. return true;
  2037. APInt BaseElts = DemandedElts;
  2038. BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
  2039. // If none of the base operand elements are demanded, replace it with undef.
  2040. if (!BaseElts && !Base.isUndef())
  2041. return TLO.CombineTo(Op,
  2042. TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
  2043. TLO.DAG.getUNDEF(VT),
  2044. Op.getOperand(1),
  2045. Op.getOperand(2)));
  2046. if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
  2047. Depth + 1))
  2048. return true;
  2049. KnownUndef.insertBits(SubUndef, SubIdx);
  2050. KnownZero.insertBits(SubZero, SubIdx);
  2051. break;
  2052. }
  2053. case ISD::EXTRACT_SUBVECTOR: {
  2054. SDValue Src = Op.getOperand(0);
  2055. ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
  2056. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2057. if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
  2058. // Offset the demanded elts by the subvector index.
  2059. uint64_t Idx = SubIdx->getZExtValue();
  2060. APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
  2061. APInt SrcUndef, SrcZero;
  2062. if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
  2063. Depth + 1))
  2064. return true;
  2065. KnownUndef = SrcUndef.extractBits(NumElts, Idx);
  2066. KnownZero = SrcZero.extractBits(NumElts, Idx);
  2067. }
  2068. break;
  2069. }
  2070. case ISD::INSERT_VECTOR_ELT: {
  2071. SDValue Vec = Op.getOperand(0);
  2072. SDValue Scl = Op.getOperand(1);
  2073. auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  2074. // For a legal, constant insertion index, if we don't need this insertion
  2075. // then strip it, else remove it from the demanded elts.
  2076. if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
  2077. unsigned Idx = CIdx->getZExtValue();
  2078. if (!DemandedElts[Idx])
  2079. return TLO.CombineTo(Op, Vec);
  2080. APInt DemandedVecElts(DemandedElts);
  2081. DemandedVecElts.clearBit(Idx);
  2082. if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
  2083. KnownZero, TLO, Depth + 1))
  2084. return true;
  2085. KnownUndef.clearBit(Idx);
  2086. if (Scl.isUndef())
  2087. KnownUndef.setBit(Idx);
  2088. KnownZero.clearBit(Idx);
  2089. if (isNullConstant(Scl) || isNullFPConstant(Scl))
  2090. KnownZero.setBit(Idx);
  2091. break;
  2092. }
  2093. APInt VecUndef, VecZero;
  2094. if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
  2095. Depth + 1))
  2096. return true;
  2097. // Without knowing the insertion index we can't set KnownUndef/KnownZero.
  2098. break;
  2099. }
  2100. case ISD::VSELECT: {
  2101. // Try to transform the select condition based on the current demanded
  2102. // elements.
  2103. // TODO: If a condition element is undef, we can choose from one arm of the
  2104. // select (and if one arm is undef, then we can propagate that to the
  2105. // result).
  2106. // TODO - add support for constant vselect masks (see IR version of this).
  2107. APInt UnusedUndef, UnusedZero;
  2108. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
  2109. UnusedZero, TLO, Depth + 1))
  2110. return true;
  2111. // See if we can simplify either vselect operand.
  2112. APInt DemandedLHS(DemandedElts);
  2113. APInt DemandedRHS(DemandedElts);
  2114. APInt UndefLHS, ZeroLHS;
  2115. APInt UndefRHS, ZeroRHS;
  2116. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
  2117. ZeroLHS, TLO, Depth + 1))
  2118. return true;
  2119. if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
  2120. ZeroRHS, TLO, Depth + 1))
  2121. return true;
  2122. KnownUndef = UndefLHS & UndefRHS;
  2123. KnownZero = ZeroLHS & ZeroRHS;
  2124. break;
  2125. }
  2126. case ISD::VECTOR_SHUFFLE: {
  2127. ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
  2128. // Collect demanded elements from shuffle operands..
  2129. APInt DemandedLHS(NumElts, 0);
  2130. APInt DemandedRHS(NumElts, 0);
  2131. for (unsigned i = 0; i != NumElts; ++i) {
  2132. int M = ShuffleMask[i];
  2133. if (M < 0 || !DemandedElts[i])
  2134. continue;
  2135. assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
  2136. if (M < (int)NumElts)
  2137. DemandedLHS.setBit(M);
  2138. else
  2139. DemandedRHS.setBit(M - NumElts);
  2140. }
  2141. // See if we can simplify either shuffle operand.
  2142. APInt UndefLHS, ZeroLHS;
  2143. APInt UndefRHS, ZeroRHS;
  2144. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
  2145. ZeroLHS, TLO, Depth + 1))
  2146. return true;
  2147. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
  2148. ZeroRHS, TLO, Depth + 1))
  2149. return true;
  2150. // Simplify mask using undef elements from LHS/RHS.
  2151. bool Updated = false;
  2152. bool IdentityLHS = true, IdentityRHS = true;
  2153. SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
  2154. for (unsigned i = 0; i != NumElts; ++i) {
  2155. int &M = NewMask[i];
  2156. if (M < 0)
  2157. continue;
  2158. if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
  2159. (M >= (int)NumElts && UndefRHS[M - NumElts])) {
  2160. Updated = true;
  2161. M = -1;
  2162. }
  2163. IdentityLHS &= (M < 0) || (M == (int)i);
  2164. IdentityRHS &= (M < 0) || ((M - NumElts) == i);
  2165. }
  2166. // Update legal shuffle masks based on demanded elements if it won't reduce
  2167. // to Identity which can cause premature removal of the shuffle mask.
  2168. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
  2169. SDValue LegalShuffle =
  2170. buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
  2171. NewMask, TLO.DAG);
  2172. if (LegalShuffle)
  2173. return TLO.CombineTo(Op, LegalShuffle);
  2174. }
  2175. // Propagate undef/zero elements from LHS/RHS.
  2176. for (unsigned i = 0; i != NumElts; ++i) {
  2177. int M = ShuffleMask[i];
  2178. if (M < 0) {
  2179. KnownUndef.setBit(i);
  2180. } else if (M < (int)NumElts) {
  2181. if (UndefLHS[M])
  2182. KnownUndef.setBit(i);
  2183. if (ZeroLHS[M])
  2184. KnownZero.setBit(i);
  2185. } else {
  2186. if (UndefRHS[M - NumElts])
  2187. KnownUndef.setBit(i);
  2188. if (ZeroRHS[M - NumElts])
  2189. KnownZero.setBit(i);
  2190. }
  2191. }
  2192. break;
  2193. }
  2194. case ISD::ANY_EXTEND_VECTOR_INREG:
  2195. case ISD::SIGN_EXTEND_VECTOR_INREG:
  2196. case ISD::ZERO_EXTEND_VECTOR_INREG: {
  2197. APInt SrcUndef, SrcZero;
  2198. SDValue Src = Op.getOperand(0);
  2199. unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
  2200. APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
  2201. if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
  2202. Depth + 1))
  2203. return true;
  2204. KnownZero = SrcZero.zextOrTrunc(NumElts);
  2205. KnownUndef = SrcUndef.zextOrTrunc(NumElts);
  2206. if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
  2207. Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
  2208. DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
  2209. // aext - if we just need the bottom element then we can bitcast.
  2210. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
  2211. }
  2212. if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
  2213. // zext(undef) upper bits are guaranteed to be zero.
  2214. if (DemandedElts.isSubsetOf(KnownUndef))
  2215. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2216. KnownUndef.clearAllBits();
  2217. }
  2218. break;
  2219. }
  2220. // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
  2221. // MAX, saturated math, etc.
  2222. case ISD::OR:
  2223. case ISD::XOR:
  2224. case ISD::ADD:
  2225. case ISD::SUB:
  2226. case ISD::FADD:
  2227. case ISD::FSUB:
  2228. case ISD::FMUL:
  2229. case ISD::FDIV:
  2230. case ISD::FREM: {
  2231. APInt UndefRHS, ZeroRHS;
  2232. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
  2233. ZeroRHS, TLO, Depth + 1))
  2234. return true;
  2235. APInt UndefLHS, ZeroLHS;
  2236. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
  2237. ZeroLHS, TLO, Depth + 1))
  2238. return true;
  2239. KnownZero = ZeroLHS & ZeroRHS;
  2240. KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
  2241. break;
  2242. }
  2243. case ISD::SHL:
  2244. case ISD::SRL:
  2245. case ISD::SRA:
  2246. case ISD::ROTL:
  2247. case ISD::ROTR: {
  2248. APInt UndefRHS, ZeroRHS;
  2249. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
  2250. ZeroRHS, TLO, Depth + 1))
  2251. return true;
  2252. APInt UndefLHS, ZeroLHS;
  2253. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
  2254. ZeroLHS, TLO, Depth + 1))
  2255. return true;
  2256. KnownZero = ZeroLHS;
  2257. KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
  2258. break;
  2259. }
  2260. case ISD::MUL:
  2261. case ISD::AND: {
  2262. APInt SrcUndef, SrcZero;
  2263. if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
  2264. SrcZero, TLO, Depth + 1))
  2265. return true;
  2266. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  2267. KnownZero, TLO, Depth + 1))
  2268. return true;
  2269. // If either side has a zero element, then the result element is zero, even
  2270. // if the other is an UNDEF.
  2271. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
  2272. // and then handle 'and' nodes with the rest of the binop opcodes.
  2273. KnownZero |= SrcZero;
  2274. KnownUndef &= SrcUndef;
  2275. KnownUndef &= ~KnownZero;
  2276. break;
  2277. }
  2278. case ISD::TRUNCATE:
  2279. case ISD::SIGN_EXTEND:
  2280. case ISD::ZERO_EXTEND:
  2281. if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
  2282. KnownZero, TLO, Depth + 1))
  2283. return true;
  2284. if (Op.getOpcode() == ISD::ZERO_EXTEND) {
  2285. // zext(undef) upper bits are guaranteed to be zero.
  2286. if (DemandedElts.isSubsetOf(KnownUndef))
  2287. return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
  2288. KnownUndef.clearAllBits();
  2289. }
  2290. break;
  2291. default: {
  2292. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
  2293. if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
  2294. KnownZero, TLO, Depth))
  2295. return true;
  2296. } else {
  2297. KnownBits Known;
  2298. APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
  2299. if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
  2300. TLO, Depth, AssumeSingleUse))
  2301. return true;
  2302. }
  2303. break;
  2304. }
  2305. }
  2306. assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
  2307. // Constant fold all undef cases.
  2308. // TODO: Handle zero cases as well.
  2309. if (DemandedElts.isSubsetOf(KnownUndef))
  2310. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
  2311. return false;
  2312. }
  2313. /// Determine which of the bits specified in Mask are known to be either zero or
  2314. /// one and return them in the Known.
  2315. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  2316. KnownBits &Known,
  2317. const APInt &DemandedElts,
  2318. const SelectionDAG &DAG,
  2319. unsigned Depth) const {
  2320. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2321. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2322. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2323. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2324. "Should use MaskedValueIsZero if you don't know whether Op"
  2325. " is a target node!");
  2326. Known.resetAll();
  2327. }
  2328. void TargetLowering::computeKnownBitsForTargetInstr(
  2329. Register R, KnownBits &Known, const APInt &DemandedElts,
  2330. const MachineRegisterInfo &MRI, unsigned Depth) const {
  2331. Known.resetAll();
  2332. }
  2333. void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
  2334. KnownBits &Known,
  2335. const APInt &DemandedElts,
  2336. const SelectionDAG &DAG,
  2337. unsigned Depth) const {
  2338. assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
  2339. if (unsigned Align = DAG.InferPtrAlignment(Op)) {
  2340. // The low bits are known zero if the pointer is aligned.
  2341. Known.Zero.setLowBits(Log2_32(Align));
  2342. }
  2343. }
  2344. /// This method can be implemented by targets that want to expose additional
  2345. /// information about sign bits to the DAG Combiner.
  2346. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
  2347. const APInt &,
  2348. const SelectionDAG &,
  2349. unsigned Depth) const {
  2350. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2351. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2352. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2353. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2354. "Should use ComputeNumSignBits if you don't know whether Op"
  2355. " is a target node!");
  2356. return 1;
  2357. }
  2358. bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
  2359. SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
  2360. TargetLoweringOpt &TLO, unsigned Depth) const {
  2361. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2362. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2363. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2364. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2365. "Should use SimplifyDemandedVectorElts if you don't know whether Op"
  2366. " is a target node!");
  2367. return false;
  2368. }
  2369. bool TargetLowering::SimplifyDemandedBitsForTargetNode(
  2370. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  2371. KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
  2372. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2373. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2374. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2375. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2376. "Should use SimplifyDemandedBits if you don't know whether Op"
  2377. " is a target node!");
  2378. computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
  2379. return false;
  2380. }
  2381. SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
  2382. SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
  2383. SelectionDAG &DAG, unsigned Depth) const {
  2384. assert(
  2385. (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2386. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2387. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2388. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2389. "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
  2390. " is a target node!");
  2391. return SDValue();
  2392. }
  2393. SDValue
  2394. TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
  2395. SDValue N1, MutableArrayRef<int> Mask,
  2396. SelectionDAG &DAG) const {
  2397. bool LegalMask = isShuffleMaskLegal(Mask, VT);
  2398. if (!LegalMask) {
  2399. std::swap(N0, N1);
  2400. ShuffleVectorSDNode::commuteMask(Mask);
  2401. LegalMask = isShuffleMaskLegal(Mask, VT);
  2402. }
  2403. if (!LegalMask)
  2404. return SDValue();
  2405. return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
  2406. }
  2407. const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
  2408. return nullptr;
  2409. }
  2410. bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
  2411. const SelectionDAG &DAG,
  2412. bool SNaN,
  2413. unsigned Depth) const {
  2414. assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
  2415. Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
  2416. Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
  2417. Op.getOpcode() == ISD::INTRINSIC_VOID) &&
  2418. "Should use isKnownNeverNaN if you don't know whether Op"
  2419. " is a target node!");
  2420. return false;
  2421. }
  2422. // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
  2423. // work with truncating build vectors and vectors with elements of less than
  2424. // 8 bits.
  2425. bool TargetLowering::isConstTrueVal(const SDNode *N) const {
  2426. if (!N)
  2427. return false;
  2428. APInt CVal;
  2429. if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
  2430. CVal = CN->getAPIntValue();
  2431. } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
  2432. auto *CN = BV->getConstantSplatNode();
  2433. if (!CN)
  2434. return false;
  2435. // If this is a truncating build vector, truncate the splat value.
  2436. // Otherwise, we may fail to match the expected values below.
  2437. unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
  2438. CVal = CN->getAPIntValue();
  2439. if (BVEltWidth < CVal.getBitWidth())
  2440. CVal = CVal.trunc(BVEltWidth);
  2441. } else {
  2442. return false;
  2443. }
  2444. switch (getBooleanContents(N->getValueType(0))) {
  2445. case UndefinedBooleanContent:
  2446. return CVal[0];
  2447. case ZeroOrOneBooleanContent:
  2448. return CVal.isOneValue();
  2449. case ZeroOrNegativeOneBooleanContent:
  2450. return CVal.isAllOnesValue();
  2451. }
  2452. llvm_unreachable("Invalid boolean contents");
  2453. }
  2454. bool TargetLowering::isConstFalseVal(const SDNode *N) const {
  2455. if (!N)
  2456. return false;
  2457. const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  2458. if (!CN) {
  2459. const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
  2460. if (!BV)
  2461. return false;
  2462. // Only interested in constant splats, we don't care about undef
  2463. // elements in identifying boolean constants and getConstantSplatNode
  2464. // returns NULL if all ops are undef;
  2465. CN = BV->getConstantSplatNode();
  2466. if (!CN)
  2467. return false;
  2468. }
  2469. if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
  2470. return !CN->getAPIntValue()[0];
  2471. return CN->isNullValue();
  2472. }
  2473. bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
  2474. bool SExt) const {
  2475. if (VT == MVT::i1)
  2476. return N->isOne();
  2477. TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
  2478. switch (Cnt) {
  2479. case TargetLowering::ZeroOrOneBooleanContent:
  2480. // An extended value of 1 is always true, unless its original type is i1,
  2481. // in which case it will be sign extended to -1.
  2482. return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
  2483. case TargetLowering::UndefinedBooleanContent:
  2484. case TargetLowering::ZeroOrNegativeOneBooleanContent:
  2485. return N->isAllOnesValue() && SExt;
  2486. }
  2487. llvm_unreachable("Unexpected enumeration.");
  2488. }
  2489. /// This helper function of SimplifySetCC tries to optimize the comparison when
  2490. /// either operand of the SetCC node is a bitwise-and instruction.
  2491. SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
  2492. ISD::CondCode Cond, const SDLoc &DL,
  2493. DAGCombinerInfo &DCI) const {
  2494. // Match these patterns in any of their permutations:
  2495. // (X & Y) == Y
  2496. // (X & Y) != Y
  2497. if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
  2498. std::swap(N0, N1);
  2499. EVT OpVT = N0.getValueType();
  2500. if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
  2501. (Cond != ISD::SETEQ && Cond != ISD::SETNE))
  2502. return SDValue();
  2503. SDValue X, Y;
  2504. if (N0.getOperand(0) == N1) {
  2505. X = N0.getOperand(1);
  2506. Y = N0.getOperand(0);
  2507. } else if (N0.getOperand(1) == N1) {
  2508. X = N0.getOperand(0);
  2509. Y = N0.getOperand(1);
  2510. } else {
  2511. return SDValue();
  2512. }
  2513. SelectionDAG &DAG = DCI.DAG;
  2514. SDValue Zero = DAG.getConstant(0, DL, OpVT);
  2515. if (DAG.isKnownToBeAPowerOfTwo(Y)) {
  2516. // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
  2517. // Note that where Y is variable and is known to have at most one bit set
  2518. // (for example, if it is Z & 1) we cannot do this; the expressions are not
  2519. // equivalent when Y == 0.
  2520. Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
  2521. if (DCI.isBeforeLegalizeOps() ||
  2522. isCondCodeLegal(Cond, N0.getSimpleValueType()))
  2523. return DAG.getSetCC(DL, VT, N0, Zero, Cond);
  2524. } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
  2525. // If the target supports an 'and-not' or 'and-complement' logic operation,
  2526. // try to use that to make a comparison operation more efficient.
  2527. // But don't do this transform if the mask is a single bit because there are
  2528. // more efficient ways to deal with that case (for example, 'bt' on x86 or
  2529. // 'rlwinm' on PPC).
  2530. // Bail out if the compare operand that we want to turn into a zero is
  2531. // already a zero (otherwise, infinite loop).
  2532. auto *YConst = dyn_cast<ConstantSDNode>(Y);
  2533. if (YConst && YConst->isNullValue())
  2534. return SDValue();
  2535. // Transform this into: ~X & Y == 0.
  2536. SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
  2537. SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
  2538. return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
  2539. }
  2540. return SDValue();
  2541. }
  2542. /// There are multiple IR patterns that could be checking whether certain
  2543. /// truncation of a signed number would be lossy or not. The pattern which is
  2544. /// best at IR level, may not lower optimally. Thus, we want to unfold it.
  2545. /// We are looking for the following pattern: (KeptBits is a constant)
  2546. /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
  2547. /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
  2548. /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
  2549. /// We will unfold it into the natural trunc+sext pattern:
  2550. /// ((%x << C) a>> C) dstcond %x
  2551. /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
  2552. SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
  2553. EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
  2554. const SDLoc &DL) const {
  2555. // We must be comparing with a constant.
  2556. ConstantSDNode *C1;
  2557. if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
  2558. return SDValue();
  2559. // N0 should be: add %x, (1 << (KeptBits-1))
  2560. if (N0->getOpcode() != ISD::ADD)
  2561. return SDValue();
  2562. // And we must be 'add'ing a constant.
  2563. ConstantSDNode *C01;
  2564. if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
  2565. return SDValue();
  2566. SDValue X = N0->getOperand(0);
  2567. EVT XVT = X.getValueType();
  2568. // Validate constants ...
  2569. APInt I1 = C1->getAPIntValue();
  2570. ISD::CondCode NewCond;
  2571. if (Cond == ISD::CondCode::SETULT) {
  2572. NewCond = ISD::CondCode::SETEQ;
  2573. } else if (Cond == ISD::CondCode::SETULE) {
  2574. NewCond = ISD::CondCode::SETEQ;
  2575. // But need to 'canonicalize' the constant.
  2576. I1 += 1;
  2577. } else if (Cond == ISD::CondCode::SETUGT) {
  2578. NewCond = ISD::CondCode::SETNE;
  2579. // But need to 'canonicalize' the constant.
  2580. I1 += 1;
  2581. } else if (Cond == ISD::CondCode::SETUGE) {
  2582. NewCond = ISD::CondCode::SETNE;
  2583. } else
  2584. return SDValue();
  2585. APInt I01 = C01->getAPIntValue();
  2586. auto checkConstants = [&I1, &I01]() -> bool {
  2587. // Both of them must be power-of-two, and the constant from setcc is bigger.
  2588. return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
  2589. };
  2590. if (checkConstants()) {
  2591. // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
  2592. } else {
  2593. // What if we invert constants? (and the target predicate)
  2594. I1.negate();
  2595. I01.negate();
  2596. NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
  2597. if (!checkConstants())
  2598. return SDValue();
  2599. // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
  2600. }
  2601. // They are power-of-two, so which bit is set?
  2602. const unsigned KeptBits = I1.logBase2();
  2603. const unsigned KeptBitsMinusOne = I01.logBase2();
  2604. // Magic!
  2605. if (KeptBits != (KeptBitsMinusOne + 1))
  2606. return SDValue();
  2607. assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
  2608. // We don't want to do this in every single case.
  2609. SelectionDAG &DAG = DCI.DAG;
  2610. if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
  2611. XVT, KeptBits))
  2612. return SDValue();
  2613. const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
  2614. assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
  2615. // Unfold into: ((%x << C) a>> C) cond %x
  2616. // Where 'cond' will be either 'eq' or 'ne'.
  2617. SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
  2618. SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
  2619. SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
  2620. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
  2621. return T2;
  2622. }
  2623. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  2624. SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
  2625. EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
  2626. DAGCombinerInfo &DCI, const SDLoc &DL) const {
  2627. assert(isConstOrConstSplat(N1C) &&
  2628. isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
  2629. "Should be a comparison with 0.");
  2630. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2631. "Valid only for [in]equality comparisons.");
  2632. unsigned NewShiftOpcode;
  2633. SDValue X, C, Y;
  2634. SelectionDAG &DAG = DCI.DAG;
  2635. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  2636. // Look for '(C l>>/<< Y)'.
  2637. auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
  2638. // The shift should be one-use.
  2639. if (!V.hasOneUse())
  2640. return false;
  2641. unsigned OldShiftOpcode = V.getOpcode();
  2642. switch (OldShiftOpcode) {
  2643. case ISD::SHL:
  2644. NewShiftOpcode = ISD::SRL;
  2645. break;
  2646. case ISD::SRL:
  2647. NewShiftOpcode = ISD::SHL;
  2648. break;
  2649. default:
  2650. return false; // must be a logical shift.
  2651. }
  2652. // We should be shifting a constant.
  2653. // FIXME: best to use isConstantOrConstantVector().
  2654. C = V.getOperand(0);
  2655. ConstantSDNode *CC =
  2656. isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  2657. if (!CC)
  2658. return false;
  2659. Y = V.getOperand(1);
  2660. ConstantSDNode *XC =
  2661. isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
  2662. return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
  2663. X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
  2664. };
  2665. // LHS of comparison should be an one-use 'and'.
  2666. if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
  2667. return SDValue();
  2668. X = N0.getOperand(0);
  2669. SDValue Mask = N0.getOperand(1);
  2670. // 'and' is commutative!
  2671. if (!Match(Mask)) {
  2672. std::swap(X, Mask);
  2673. if (!Match(Mask))
  2674. return SDValue();
  2675. }
  2676. EVT VT = X.getValueType();
  2677. // Produce:
  2678. // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
  2679. SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
  2680. SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
  2681. SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
  2682. return T2;
  2683. }
  2684. /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
  2685. /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
  2686. /// handle the commuted versions of these patterns.
  2687. SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
  2688. ISD::CondCode Cond, const SDLoc &DL,
  2689. DAGCombinerInfo &DCI) const {
  2690. unsigned BOpcode = N0.getOpcode();
  2691. assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
  2692. "Unexpected binop");
  2693. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
  2694. // (X + Y) == X --> Y == 0
  2695. // (X - Y) == X --> Y == 0
  2696. // (X ^ Y) == X --> Y == 0
  2697. SelectionDAG &DAG = DCI.DAG;
  2698. EVT OpVT = N0.getValueType();
  2699. SDValue X = N0.getOperand(0);
  2700. SDValue Y = N0.getOperand(1);
  2701. if (X == N1)
  2702. return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
  2703. if (Y != N1)
  2704. return SDValue();
  2705. // (X + Y) == Y --> X == 0
  2706. // (X ^ Y) == Y --> X == 0
  2707. if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
  2708. return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
  2709. // The shift would not be valid if the operands are boolean (i1).
  2710. if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
  2711. return SDValue();
  2712. // (X - Y) == Y --> X == Y << 1
  2713. EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
  2714. !DCI.isBeforeLegalize());
  2715. SDValue One = DAG.getConstant(1, DL, ShiftVT);
  2716. SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
  2717. if (!DCI.isCalledByLegalizer())
  2718. DCI.AddToWorklist(YShl1.getNode());
  2719. return DAG.getSetCC(DL, VT, X, YShl1, Cond);
  2720. }
  2721. /// Try to simplify a setcc built with the specified operands and cc. If it is
  2722. /// unable to simplify it, return a null SDValue.
  2723. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
  2724. ISD::CondCode Cond, bool foldBooleans,
  2725. DAGCombinerInfo &DCI,
  2726. const SDLoc &dl) const {
  2727. SelectionDAG &DAG = DCI.DAG;
  2728. EVT OpVT = N0.getValueType();
  2729. // Constant fold or commute setcc.
  2730. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
  2731. return Fold;
  2732. // Ensure that the constant occurs on the RHS and fold constant comparisons.
  2733. // TODO: Handle non-splat vector constants. All undef causes trouble.
  2734. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
  2735. if (isConstOrConstSplat(N0) &&
  2736. (DCI.isBeforeLegalizeOps() ||
  2737. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
  2738. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  2739. // If we have a subtract with the same 2 non-constant operands as this setcc
  2740. // -- but in reverse order -- then try to commute the operands of this setcc
  2741. // to match. A matching pair of setcc (cmp) and sub may be combined into 1
  2742. // instruction on some targets.
  2743. if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
  2744. (DCI.isBeforeLegalizeOps() ||
  2745. isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
  2746. DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
  2747. !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
  2748. return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
  2749. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  2750. const APInt &C1 = N1C->getAPIntValue();
  2751. // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
  2752. // equality comparison, then we're just comparing whether X itself is
  2753. // zero.
  2754. if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
  2755. N0.getOperand(0).getOpcode() == ISD::CTLZ &&
  2756. N0.getOperand(1).getOpcode() == ISD::Constant) {
  2757. const APInt &ShAmt = N0.getConstantOperandAPInt(1);
  2758. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2759. ShAmt == Log2_32(N0.getValueSizeInBits())) {
  2760. if ((C1 == 0) == (Cond == ISD::SETEQ)) {
  2761. // (srl (ctlz x), 5) == 0 -> X != 0
  2762. // (srl (ctlz x), 5) != 1 -> X != 0
  2763. Cond = ISD::SETNE;
  2764. } else {
  2765. // (srl (ctlz x), 5) != 0 -> X == 0
  2766. // (srl (ctlz x), 5) == 1 -> X == 0
  2767. Cond = ISD::SETEQ;
  2768. }
  2769. SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
  2770. return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
  2771. Zero, Cond);
  2772. }
  2773. }
  2774. SDValue CTPOP = N0;
  2775. // Look through truncs that don't change the value of a ctpop.
  2776. if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
  2777. CTPOP = N0.getOperand(0);
  2778. if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
  2779. (N0 == CTPOP ||
  2780. N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
  2781. EVT CTVT = CTPOP.getValueType();
  2782. SDValue CTOp = CTPOP.getOperand(0);
  2783. // (ctpop x) u< 2 -> (x & x-1) == 0
  2784. // (ctpop x) u> 1 -> (x & x-1) != 0
  2785. if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
  2786. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  2787. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
  2788. SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
  2789. ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
  2790. return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
  2791. }
  2792. // If ctpop is not supported, expand a power-of-2 comparison based on it.
  2793. if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
  2794. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  2795. // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
  2796. // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
  2797. SDValue Zero = DAG.getConstant(0, dl, CTVT);
  2798. SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
  2799. ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
  2800. SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
  2801. SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
  2802. SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
  2803. SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
  2804. unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
  2805. return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
  2806. }
  2807. }
  2808. // (zext x) == C --> x == (trunc C)
  2809. // (sext x) == C --> x == (trunc C)
  2810. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  2811. DCI.isBeforeLegalize() && N0->hasOneUse()) {
  2812. unsigned MinBits = N0.getValueSizeInBits();
  2813. SDValue PreExt;
  2814. bool Signed = false;
  2815. if (N0->getOpcode() == ISD::ZERO_EXTEND) {
  2816. // ZExt
  2817. MinBits = N0->getOperand(0).getValueSizeInBits();
  2818. PreExt = N0->getOperand(0);
  2819. } else if (N0->getOpcode() == ISD::AND) {
  2820. // DAGCombine turns costly ZExts into ANDs
  2821. if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
  2822. if ((C->getAPIntValue()+1).isPowerOf2()) {
  2823. MinBits = C->getAPIntValue().countTrailingOnes();
  2824. PreExt = N0->getOperand(0);
  2825. }
  2826. } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
  2827. // SExt
  2828. MinBits = N0->getOperand(0).getValueSizeInBits();
  2829. PreExt = N0->getOperand(0);
  2830. Signed = true;
  2831. } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
  2832. // ZEXTLOAD / SEXTLOAD
  2833. if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
  2834. MinBits = LN0->getMemoryVT().getSizeInBits();
  2835. PreExt = N0;
  2836. } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
  2837. Signed = true;
  2838. MinBits = LN0->getMemoryVT().getSizeInBits();
  2839. PreExt = N0;
  2840. }
  2841. }
  2842. // Figure out how many bits we need to preserve this constant.
  2843. unsigned ReqdBits = Signed ?
  2844. C1.getBitWidth() - C1.getNumSignBits() + 1 :
  2845. C1.getActiveBits();
  2846. // Make sure we're not losing bits from the constant.
  2847. if (MinBits > 0 &&
  2848. MinBits < C1.getBitWidth() &&
  2849. MinBits >= ReqdBits) {
  2850. EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
  2851. if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
  2852. // Will get folded away.
  2853. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
  2854. if (MinBits == 1 && C1 == 1)
  2855. // Invert the condition.
  2856. return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
  2857. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  2858. SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
  2859. return DAG.getSetCC(dl, VT, Trunc, C, Cond);
  2860. }
  2861. // If truncating the setcc operands is not desirable, we can still
  2862. // simplify the expression in some cases:
  2863. // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
  2864. // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
  2865. // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
  2866. // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
  2867. // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
  2868. // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
  2869. SDValue TopSetCC = N0->getOperand(0);
  2870. unsigned N0Opc = N0->getOpcode();
  2871. bool SExt = (N0Opc == ISD::SIGN_EXTEND);
  2872. if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
  2873. TopSetCC.getOpcode() == ISD::SETCC &&
  2874. (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
  2875. (isConstFalseVal(N1C) ||
  2876. isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
  2877. bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
  2878. (!N1C->isNullValue() && Cond == ISD::SETNE);
  2879. if (!Inverse)
  2880. return TopSetCC;
  2881. ISD::CondCode InvCond = ISD::getSetCCInverse(
  2882. cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
  2883. TopSetCC.getOperand(0).getValueType().isInteger());
  2884. return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
  2885. TopSetCC.getOperand(1),
  2886. InvCond);
  2887. }
  2888. }
  2889. }
  2890. // If the LHS is '(and load, const)', the RHS is 0, the test is for
  2891. // equality or unsigned, and all 1 bits of the const are in the same
  2892. // partial word, see if we can shorten the load.
  2893. if (DCI.isBeforeLegalize() &&
  2894. !ISD::isSignedIntSetCC(Cond) &&
  2895. N0.getOpcode() == ISD::AND && C1 == 0 &&
  2896. N0.getNode()->hasOneUse() &&
  2897. isa<LoadSDNode>(N0.getOperand(0)) &&
  2898. N0.getOperand(0).getNode()->hasOneUse() &&
  2899. isa<ConstantSDNode>(N0.getOperand(1))) {
  2900. LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
  2901. APInt bestMask;
  2902. unsigned bestWidth = 0, bestOffset = 0;
  2903. if (Lod->isSimple() && Lod->isUnindexed()) {
  2904. unsigned origWidth = N0.getValueSizeInBits();
  2905. unsigned maskWidth = origWidth;
  2906. // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
  2907. // 8 bits, but have to be careful...
  2908. if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
  2909. origWidth = Lod->getMemoryVT().getSizeInBits();
  2910. const APInt &Mask = N0.getConstantOperandAPInt(1);
  2911. for (unsigned width = origWidth / 2; width>=8; width /= 2) {
  2912. APInt newMask = APInt::getLowBitsSet(maskWidth, width);
  2913. for (unsigned offset=0; offset<origWidth/width; offset++) {
  2914. if (Mask.isSubsetOf(newMask)) {
  2915. if (DAG.getDataLayout().isLittleEndian())
  2916. bestOffset = (uint64_t)offset * (width/8);
  2917. else
  2918. bestOffset = (origWidth/width - offset - 1) * (width/8);
  2919. bestMask = Mask.lshr(offset * (width/8) * 8);
  2920. bestWidth = width;
  2921. break;
  2922. }
  2923. newMask <<= width;
  2924. }
  2925. }
  2926. }
  2927. if (bestWidth) {
  2928. EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
  2929. if (newVT.isRound() &&
  2930. shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
  2931. EVT PtrType = Lod->getOperand(1).getValueType();
  2932. SDValue Ptr = Lod->getBasePtr();
  2933. if (bestOffset != 0)
  2934. Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
  2935. DAG.getConstant(bestOffset, dl, PtrType));
  2936. unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
  2937. SDValue NewLoad = DAG.getLoad(
  2938. newVT, dl, Lod->getChain(), Ptr,
  2939. Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
  2940. return DAG.getSetCC(dl, VT,
  2941. DAG.getNode(ISD::AND, dl, newVT, NewLoad,
  2942. DAG.getConstant(bestMask.trunc(bestWidth),
  2943. dl, newVT)),
  2944. DAG.getConstant(0LL, dl, newVT), Cond);
  2945. }
  2946. }
  2947. }
  2948. // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
  2949. if (N0.getOpcode() == ISD::ZERO_EXTEND) {
  2950. unsigned InSize = N0.getOperand(0).getValueSizeInBits();
  2951. // If the comparison constant has bits in the upper part, the
  2952. // zero-extended value could never match.
  2953. if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
  2954. C1.getBitWidth() - InSize))) {
  2955. switch (Cond) {
  2956. case ISD::SETUGT:
  2957. case ISD::SETUGE:
  2958. case ISD::SETEQ:
  2959. return DAG.getConstant(0, dl, VT);
  2960. case ISD::SETULT:
  2961. case ISD::SETULE:
  2962. case ISD::SETNE:
  2963. return DAG.getConstant(1, dl, VT);
  2964. case ISD::SETGT:
  2965. case ISD::SETGE:
  2966. // True if the sign bit of C1 is set.
  2967. return DAG.getConstant(C1.isNegative(), dl, VT);
  2968. case ISD::SETLT:
  2969. case ISD::SETLE:
  2970. // True if the sign bit of C1 isn't set.
  2971. return DAG.getConstant(C1.isNonNegative(), dl, VT);
  2972. default:
  2973. break;
  2974. }
  2975. }
  2976. // Otherwise, we can perform the comparison with the low bits.
  2977. switch (Cond) {
  2978. case ISD::SETEQ:
  2979. case ISD::SETNE:
  2980. case ISD::SETUGT:
  2981. case ISD::SETUGE:
  2982. case ISD::SETULT:
  2983. case ISD::SETULE: {
  2984. EVT newVT = N0.getOperand(0).getValueType();
  2985. if (DCI.isBeforeLegalizeOps() ||
  2986. (isOperationLegal(ISD::SETCC, newVT) &&
  2987. isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
  2988. EVT NewSetCCVT =
  2989. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
  2990. SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
  2991. SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
  2992. NewConst, Cond);
  2993. return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
  2994. }
  2995. break;
  2996. }
  2997. default:
  2998. break; // todo, be more careful with signed comparisons
  2999. }
  3000. } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
  3001. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3002. EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
  3003. unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
  3004. EVT ExtDstTy = N0.getValueType();
  3005. unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
  3006. // If the constant doesn't fit into the number of bits for the source of
  3007. // the sign extension, it is impossible for both sides to be equal.
  3008. if (C1.getMinSignedBits() > ExtSrcTyBits)
  3009. return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
  3010. SDValue ZextOp;
  3011. EVT Op0Ty = N0.getOperand(0).getValueType();
  3012. if (Op0Ty == ExtSrcTy) {
  3013. ZextOp = N0.getOperand(0);
  3014. } else {
  3015. APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
  3016. ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
  3017. DAG.getConstant(Imm, dl, Op0Ty));
  3018. }
  3019. if (!DCI.isCalledByLegalizer())
  3020. DCI.AddToWorklist(ZextOp.getNode());
  3021. // Otherwise, make this a use of a zext.
  3022. return DAG.getSetCC(dl, VT, ZextOp,
  3023. DAG.getConstant(C1 & APInt::getLowBitsSet(
  3024. ExtDstTyBits,
  3025. ExtSrcTyBits),
  3026. dl, ExtDstTy),
  3027. Cond);
  3028. } else if ((N1C->isNullValue() || N1C->isOne()) &&
  3029. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3030. // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
  3031. if (N0.getOpcode() == ISD::SETCC &&
  3032. isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
  3033. bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
  3034. if (TrueWhenTrue)
  3035. return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
  3036. // Invert the condition.
  3037. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
  3038. CC = ISD::getSetCCInverse(CC,
  3039. N0.getOperand(0).getValueType().isInteger());
  3040. if (DCI.isBeforeLegalizeOps() ||
  3041. isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
  3042. return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
  3043. }
  3044. if ((N0.getOpcode() == ISD::XOR ||
  3045. (N0.getOpcode() == ISD::AND &&
  3046. N0.getOperand(0).getOpcode() == ISD::XOR &&
  3047. N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
  3048. isa<ConstantSDNode>(N0.getOperand(1)) &&
  3049. cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
  3050. // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
  3051. // can only do this if the top bits are known zero.
  3052. unsigned BitWidth = N0.getValueSizeInBits();
  3053. if (DAG.MaskedValueIsZero(N0,
  3054. APInt::getHighBitsSet(BitWidth,
  3055. BitWidth-1))) {
  3056. // Okay, get the un-inverted input value.
  3057. SDValue Val;
  3058. if (N0.getOpcode() == ISD::XOR) {
  3059. Val = N0.getOperand(0);
  3060. } else {
  3061. assert(N0.getOpcode() == ISD::AND &&
  3062. N0.getOperand(0).getOpcode() == ISD::XOR);
  3063. // ((X^1)&1)^1 -> X & 1
  3064. Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
  3065. N0.getOperand(0).getOperand(0),
  3066. N0.getOperand(1));
  3067. }
  3068. return DAG.getSetCC(dl, VT, Val, N1,
  3069. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3070. }
  3071. } else if (N1C->isOne() &&
  3072. (VT == MVT::i1 ||
  3073. getBooleanContents(N0->getValueType(0)) ==
  3074. ZeroOrOneBooleanContent)) {
  3075. SDValue Op0 = N0;
  3076. if (Op0.getOpcode() == ISD::TRUNCATE)
  3077. Op0 = Op0.getOperand(0);
  3078. if ((Op0.getOpcode() == ISD::XOR) &&
  3079. Op0.getOperand(0).getOpcode() == ISD::SETCC &&
  3080. Op0.getOperand(1).getOpcode() == ISD::SETCC) {
  3081. // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
  3082. Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
  3083. return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
  3084. Cond);
  3085. }
  3086. if (Op0.getOpcode() == ISD::AND &&
  3087. isa<ConstantSDNode>(Op0.getOperand(1)) &&
  3088. cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
  3089. // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
  3090. if (Op0.getValueType().bitsGT(VT))
  3091. Op0 = DAG.getNode(ISD::AND, dl, VT,
  3092. DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
  3093. DAG.getConstant(1, dl, VT));
  3094. else if (Op0.getValueType().bitsLT(VT))
  3095. Op0 = DAG.getNode(ISD::AND, dl, VT,
  3096. DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
  3097. DAG.getConstant(1, dl, VT));
  3098. return DAG.getSetCC(dl, VT, Op0,
  3099. DAG.getConstant(0, dl, Op0.getValueType()),
  3100. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3101. }
  3102. if (Op0.getOpcode() == ISD::AssertZext &&
  3103. cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
  3104. return DAG.getSetCC(dl, VT, Op0,
  3105. DAG.getConstant(0, dl, Op0.getValueType()),
  3106. Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
  3107. }
  3108. }
  3109. // Given:
  3110. // icmp eq/ne (urem %x, %y), 0
  3111. // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
  3112. // icmp eq/ne %x, 0
  3113. if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
  3114. (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3115. KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
  3116. KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
  3117. if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
  3118. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
  3119. }
  3120. if (SDValue V =
  3121. optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
  3122. return V;
  3123. }
  3124. // These simplifications apply to splat vectors as well.
  3125. // TODO: Handle more splat vector cases.
  3126. if (auto *N1C = isConstOrConstSplat(N1)) {
  3127. const APInt &C1 = N1C->getAPIntValue();
  3128. APInt MinVal, MaxVal;
  3129. unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
  3130. if (ISD::isSignedIntSetCC(Cond)) {
  3131. MinVal = APInt::getSignedMinValue(OperandBitSize);
  3132. MaxVal = APInt::getSignedMaxValue(OperandBitSize);
  3133. } else {
  3134. MinVal = APInt::getMinValue(OperandBitSize);
  3135. MaxVal = APInt::getMaxValue(OperandBitSize);
  3136. }
  3137. // Canonicalize GE/LE comparisons to use GT/LT comparisons.
  3138. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
  3139. // X >= MIN --> true
  3140. if (C1 == MinVal)
  3141. return DAG.getBoolConstant(true, dl, VT, OpVT);
  3142. if (!VT.isVector()) { // TODO: Support this for vectors.
  3143. // X >= C0 --> X > (C0 - 1)
  3144. APInt C = C1 - 1;
  3145. ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
  3146. if ((DCI.isBeforeLegalizeOps() ||
  3147. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  3148. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  3149. isLegalICmpImmediate(C.getSExtValue())))) {
  3150. return DAG.getSetCC(dl, VT, N0,
  3151. DAG.getConstant(C, dl, N1.getValueType()),
  3152. NewCC);
  3153. }
  3154. }
  3155. }
  3156. if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
  3157. // X <= MAX --> true
  3158. if (C1 == MaxVal)
  3159. return DAG.getBoolConstant(true, dl, VT, OpVT);
  3160. // X <= C0 --> X < (C0 + 1)
  3161. if (!VT.isVector()) { // TODO: Support this for vectors.
  3162. APInt C = C1 + 1;
  3163. ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
  3164. if ((DCI.isBeforeLegalizeOps() ||
  3165. isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
  3166. (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
  3167. isLegalICmpImmediate(C.getSExtValue())))) {
  3168. return DAG.getSetCC(dl, VT, N0,
  3169. DAG.getConstant(C, dl, N1.getValueType()),
  3170. NewCC);
  3171. }
  3172. }
  3173. }
  3174. if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
  3175. if (C1 == MinVal)
  3176. return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
  3177. // TODO: Support this for vectors after legalize ops.
  3178. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3179. // Canonicalize setlt X, Max --> setne X, Max
  3180. if (C1 == MaxVal)
  3181. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  3182. // If we have setult X, 1, turn it into seteq X, 0
  3183. if (C1 == MinVal+1)
  3184. return DAG.getSetCC(dl, VT, N0,
  3185. DAG.getConstant(MinVal, dl, N0.getValueType()),
  3186. ISD::SETEQ);
  3187. }
  3188. }
  3189. if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
  3190. if (C1 == MaxVal)
  3191. return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
  3192. // TODO: Support this for vectors after legalize ops.
  3193. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3194. // Canonicalize setgt X, Min --> setne X, Min
  3195. if (C1 == MinVal)
  3196. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
  3197. // If we have setugt X, Max-1, turn it into seteq X, Max
  3198. if (C1 == MaxVal-1)
  3199. return DAG.getSetCC(dl, VT, N0,
  3200. DAG.getConstant(MaxVal, dl, N0.getValueType()),
  3201. ISD::SETEQ);
  3202. }
  3203. }
  3204. if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
  3205. // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
  3206. if (C1.isNullValue())
  3207. if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
  3208. VT, N0, N1, Cond, DCI, dl))
  3209. return CC;
  3210. }
  3211. // If we have "setcc X, C0", check to see if we can shrink the immediate
  3212. // by changing cc.
  3213. // TODO: Support this for vectors after legalize ops.
  3214. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
  3215. // SETUGT X, SINTMAX -> SETLT X, 0
  3216. if (Cond == ISD::SETUGT &&
  3217. C1 == APInt::getSignedMaxValue(OperandBitSize))
  3218. return DAG.getSetCC(dl, VT, N0,
  3219. DAG.getConstant(0, dl, N1.getValueType()),
  3220. ISD::SETLT);
  3221. // SETULT X, SINTMIN -> SETGT X, -1
  3222. if (Cond == ISD::SETULT &&
  3223. C1 == APInt::getSignedMinValue(OperandBitSize)) {
  3224. SDValue ConstMinusOne =
  3225. DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
  3226. N1.getValueType());
  3227. return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
  3228. }
  3229. }
  3230. }
  3231. // Back to non-vector simplifications.
  3232. // TODO: Can we do these for vector splats?
  3233. if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
  3234. const APInt &C1 = N1C->getAPIntValue();
  3235. // Fold bit comparisons when we can.
  3236. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3237. (VT == N0.getValueType() ||
  3238. (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
  3239. N0.getOpcode() == ISD::AND) {
  3240. auto &DL = DAG.getDataLayout();
  3241. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3242. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  3243. !DCI.isBeforeLegalize());
  3244. if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
  3245. // Perform the xform if the AND RHS is a single bit.
  3246. if (AndRHS->getAPIntValue().isPowerOf2()) {
  3247. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  3248. DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
  3249. DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
  3250. ShiftTy)));
  3251. }
  3252. } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
  3253. // (X & 8) == 8 --> (X & 8) >> 3
  3254. // Perform the xform if C1 is a single bit.
  3255. if (C1.isPowerOf2()) {
  3256. return DAG.getNode(ISD::TRUNCATE, dl, VT,
  3257. DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
  3258. DAG.getConstant(C1.logBase2(), dl,
  3259. ShiftTy)));
  3260. }
  3261. }
  3262. }
  3263. }
  3264. if (C1.getMinSignedBits() <= 64 &&
  3265. !isLegalICmpImmediate(C1.getSExtValue())) {
  3266. // (X & -256) == 256 -> (X >> 8) == 1
  3267. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3268. N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
  3269. if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3270. const APInt &AndRHSC = AndRHS->getAPIntValue();
  3271. if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
  3272. unsigned ShiftBits = AndRHSC.countTrailingZeros();
  3273. auto &DL = DAG.getDataLayout();
  3274. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  3275. !DCI.isBeforeLegalize());
  3276. EVT CmpTy = N0.getValueType();
  3277. SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
  3278. DAG.getConstant(ShiftBits, dl,
  3279. ShiftTy));
  3280. SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
  3281. return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
  3282. }
  3283. }
  3284. } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
  3285. Cond == ISD::SETULE || Cond == ISD::SETUGT) {
  3286. bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
  3287. // X < 0x100000000 -> (X >> 32) < 1
  3288. // X >= 0x100000000 -> (X >> 32) >= 1
  3289. // X <= 0x0ffffffff -> (X >> 32) < 1
  3290. // X > 0x0ffffffff -> (X >> 32) >= 1
  3291. unsigned ShiftBits;
  3292. APInt NewC = C1;
  3293. ISD::CondCode NewCond = Cond;
  3294. if (AdjOne) {
  3295. ShiftBits = C1.countTrailingOnes();
  3296. NewC = NewC + 1;
  3297. NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
  3298. } else {
  3299. ShiftBits = C1.countTrailingZeros();
  3300. }
  3301. NewC.lshrInPlace(ShiftBits);
  3302. if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
  3303. isLegalICmpImmediate(NewC.getSExtValue())) {
  3304. auto &DL = DAG.getDataLayout();
  3305. EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
  3306. !DCI.isBeforeLegalize());
  3307. EVT CmpTy = N0.getValueType();
  3308. SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
  3309. DAG.getConstant(ShiftBits, dl, ShiftTy));
  3310. SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
  3311. return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
  3312. }
  3313. }
  3314. }
  3315. }
  3316. if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
  3317. auto *CFP = cast<ConstantFPSDNode>(N1);
  3318. assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
  3319. // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
  3320. // constant if knowing that the operand is non-nan is enough. We prefer to
  3321. // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
  3322. // materialize 0.0.
  3323. if (Cond == ISD::SETO || Cond == ISD::SETUO)
  3324. return DAG.getSetCC(dl, VT, N0, N0, Cond);
  3325. // setcc (fneg x), C -> setcc swap(pred) x, -C
  3326. if (N0.getOpcode() == ISD::FNEG) {
  3327. ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
  3328. if (DCI.isBeforeLegalizeOps() ||
  3329. isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
  3330. SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
  3331. return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
  3332. }
  3333. }
  3334. // If the condition is not legal, see if we can find an equivalent one
  3335. // which is legal.
  3336. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
  3337. // If the comparison was an awkward floating-point == or != and one of
  3338. // the comparison operands is infinity or negative infinity, convert the
  3339. // condition to a less-awkward <= or >=.
  3340. if (CFP->getValueAPF().isInfinity()) {
  3341. if (CFP->getValueAPF().isNegative()) {
  3342. if (Cond == ISD::SETOEQ &&
  3343. isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
  3344. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
  3345. if (Cond == ISD::SETUEQ &&
  3346. isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
  3347. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
  3348. if (Cond == ISD::SETUNE &&
  3349. isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
  3350. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
  3351. if (Cond == ISD::SETONE &&
  3352. isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
  3353. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
  3354. } else {
  3355. if (Cond == ISD::SETOEQ &&
  3356. isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
  3357. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
  3358. if (Cond == ISD::SETUEQ &&
  3359. isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
  3360. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
  3361. if (Cond == ISD::SETUNE &&
  3362. isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
  3363. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
  3364. if (Cond == ISD::SETONE &&
  3365. isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
  3366. return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
  3367. }
  3368. }
  3369. }
  3370. }
  3371. if (N0 == N1) {
  3372. // The sext(setcc()) => setcc() optimization relies on the appropriate
  3373. // constant being emitted.
  3374. assert(!N0.getValueType().isInteger() &&
  3375. "Integer types should be handled by FoldSetCC");
  3376. bool EqTrue = ISD::isTrueWhenEqual(Cond);
  3377. unsigned UOF = ISD::getUnorderedFlavor(Cond);
  3378. if (UOF == 2) // FP operators that are undefined on NaNs.
  3379. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  3380. if (UOF == unsigned(EqTrue))
  3381. return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
  3382. // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
  3383. // if it is not already.
  3384. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
  3385. if (NewCond != Cond &&
  3386. (DCI.isBeforeLegalizeOps() ||
  3387. isCondCodeLegal(NewCond, N0.getSimpleValueType())))
  3388. return DAG.getSetCC(dl, VT, N0, N1, NewCond);
  3389. }
  3390. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  3391. N0.getValueType().isInteger()) {
  3392. if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
  3393. N0.getOpcode() == ISD::XOR) {
  3394. // Simplify (X+Y) == (X+Z) --> Y == Z
  3395. if (N0.getOpcode() == N1.getOpcode()) {
  3396. if (N0.getOperand(0) == N1.getOperand(0))
  3397. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
  3398. if (N0.getOperand(1) == N1.getOperand(1))
  3399. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
  3400. if (isCommutativeBinOp(N0.getOpcode())) {
  3401. // If X op Y == Y op X, try other combinations.
  3402. if (N0.getOperand(0) == N1.getOperand(1))
  3403. return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
  3404. Cond);
  3405. if (N0.getOperand(1) == N1.getOperand(0))
  3406. return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
  3407. Cond);
  3408. }
  3409. }
  3410. // If RHS is a legal immediate value for a compare instruction, we need
  3411. // to be careful about increasing register pressure needlessly.
  3412. bool LegalRHSImm = false;
  3413. if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
  3414. if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
  3415. // Turn (X+C1) == C2 --> X == C2-C1
  3416. if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
  3417. return DAG.getSetCC(dl, VT, N0.getOperand(0),
  3418. DAG.getConstant(RHSC->getAPIntValue()-
  3419. LHSR->getAPIntValue(),
  3420. dl, N0.getValueType()), Cond);
  3421. }
  3422. // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
  3423. if (N0.getOpcode() == ISD::XOR)
  3424. // If we know that all of the inverted bits are zero, don't bother
  3425. // performing the inversion.
  3426. if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
  3427. return
  3428. DAG.getSetCC(dl, VT, N0.getOperand(0),
  3429. DAG.getConstant(LHSR->getAPIntValue() ^
  3430. RHSC->getAPIntValue(),
  3431. dl, N0.getValueType()),
  3432. Cond);
  3433. }
  3434. // Turn (C1-X) == C2 --> X == C1-C2
  3435. if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
  3436. if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
  3437. return
  3438. DAG.getSetCC(dl, VT, N0.getOperand(1),
  3439. DAG.getConstant(SUBC->getAPIntValue() -
  3440. RHSC->getAPIntValue(),
  3441. dl, N0.getValueType()),
  3442. Cond);
  3443. }
  3444. }
  3445. // Could RHSC fold directly into a compare?
  3446. if (RHSC->getValueType(0).getSizeInBits() <= 64)
  3447. LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
  3448. }
  3449. // (X+Y) == X --> Y == 0 and similar folds.
  3450. // Don't do this if X is an immediate that can fold into a cmp
  3451. // instruction and X+Y has other uses. It could be an induction variable
  3452. // chain, and the transform would increase register pressure.
  3453. if (!LegalRHSImm || N0.hasOneUse())
  3454. if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
  3455. return V;
  3456. }
  3457. if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
  3458. N1.getOpcode() == ISD::XOR)
  3459. if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
  3460. return V;
  3461. if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
  3462. return V;
  3463. }
  3464. // Fold remainder of division by a constant.
  3465. if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
  3466. N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
  3467. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  3468. // When division is cheap or optimizing for minimum size,
  3469. // fall through to DIVREM creation by skipping this fold.
  3470. if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
  3471. if (N0.getOpcode() == ISD::UREM) {
  3472. if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
  3473. return Folded;
  3474. } else if (N0.getOpcode() == ISD::SREM) {
  3475. if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
  3476. return Folded;
  3477. }
  3478. }
  3479. }
  3480. // Fold away ALL boolean setcc's.
  3481. if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
  3482. SDValue Temp;
  3483. switch (Cond) {
  3484. default: llvm_unreachable("Unknown integer setcc!");
  3485. case ISD::SETEQ: // X == Y -> ~(X^Y)
  3486. Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  3487. N0 = DAG.getNOT(dl, Temp, OpVT);
  3488. if (!DCI.isCalledByLegalizer())
  3489. DCI.AddToWorklist(Temp.getNode());
  3490. break;
  3491. case ISD::SETNE: // X != Y --> (X^Y)
  3492. N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
  3493. break;
  3494. case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
  3495. case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
  3496. Temp = DAG.getNOT(dl, N0, OpVT);
  3497. N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
  3498. if (!DCI.isCalledByLegalizer())
  3499. DCI.AddToWorklist(Temp.getNode());
  3500. break;
  3501. case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
  3502. case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
  3503. Temp = DAG.getNOT(dl, N1, OpVT);
  3504. N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
  3505. if (!DCI.isCalledByLegalizer())
  3506. DCI.AddToWorklist(Temp.getNode());
  3507. break;
  3508. case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
  3509. case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
  3510. Temp = DAG.getNOT(dl, N0, OpVT);
  3511. N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
  3512. if (!DCI.isCalledByLegalizer())
  3513. DCI.AddToWorklist(Temp.getNode());
  3514. break;
  3515. case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
  3516. case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
  3517. Temp = DAG.getNOT(dl, N1, OpVT);
  3518. N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
  3519. break;
  3520. }
  3521. if (VT.getScalarType() != MVT::i1) {
  3522. if (!DCI.isCalledByLegalizer())
  3523. DCI.AddToWorklist(N0.getNode());
  3524. // FIXME: If running after legalize, we probably can't do this.
  3525. ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
  3526. N0 = DAG.getNode(ExtendCode, dl, VT, N0);
  3527. }
  3528. return N0;
  3529. }
  3530. // Could not fold it.
  3531. return SDValue();
  3532. }
  3533. /// Returns true (and the GlobalValue and the offset) if the node is a
  3534. /// GlobalAddress + offset.
  3535. bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
  3536. int64_t &Offset) const {
  3537. SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
  3538. if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
  3539. GA = GASD->getGlobal();
  3540. Offset += GASD->getOffset();
  3541. return true;
  3542. }
  3543. if (N->getOpcode() == ISD::ADD) {
  3544. SDValue N1 = N->getOperand(0);
  3545. SDValue N2 = N->getOperand(1);
  3546. if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
  3547. if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
  3548. Offset += V->getSExtValue();
  3549. return true;
  3550. }
  3551. } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
  3552. if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
  3553. Offset += V->getSExtValue();
  3554. return true;
  3555. }
  3556. }
  3557. }
  3558. return false;
  3559. }
  3560. SDValue TargetLowering::PerformDAGCombine(SDNode *N,
  3561. DAGCombinerInfo &DCI) const {
  3562. // Default implementation: no optimization.
  3563. return SDValue();
  3564. }
  3565. //===----------------------------------------------------------------------===//
  3566. // Inline Assembler Implementation Methods
  3567. //===----------------------------------------------------------------------===//
  3568. TargetLowering::ConstraintType
  3569. TargetLowering::getConstraintType(StringRef Constraint) const {
  3570. unsigned S = Constraint.size();
  3571. if (S == 1) {
  3572. switch (Constraint[0]) {
  3573. default: break;
  3574. case 'r':
  3575. return C_RegisterClass;
  3576. case 'm': // memory
  3577. case 'o': // offsetable
  3578. case 'V': // not offsetable
  3579. return C_Memory;
  3580. case 'n': // Simple Integer
  3581. case 'E': // Floating Point Constant
  3582. case 'F': // Floating Point Constant
  3583. return C_Immediate;
  3584. case 'i': // Simple Integer or Relocatable Constant
  3585. case 's': // Relocatable Constant
  3586. case 'p': // Address.
  3587. case 'X': // Allow ANY value.
  3588. case 'I': // Target registers.
  3589. case 'J':
  3590. case 'K':
  3591. case 'L':
  3592. case 'M':
  3593. case 'N':
  3594. case 'O':
  3595. case 'P':
  3596. case '<':
  3597. case '>':
  3598. return C_Other;
  3599. }
  3600. }
  3601. if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
  3602. if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
  3603. return C_Memory;
  3604. return C_Register;
  3605. }
  3606. return C_Unknown;
  3607. }
  3608. /// Try to replace an X constraint, which matches anything, with another that
  3609. /// has more specific requirements based on the type of the corresponding
  3610. /// operand.
  3611. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
  3612. if (ConstraintVT.isInteger())
  3613. return "r";
  3614. if (ConstraintVT.isFloatingPoint())
  3615. return "f"; // works for many targets
  3616. return nullptr;
  3617. }
  3618. SDValue TargetLowering::LowerAsmOutputForConstraint(
  3619. SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
  3620. SelectionDAG &DAG) const {
  3621. return SDValue();
  3622. }
  3623. /// Lower the specified operand into the Ops vector.
  3624. /// If it is invalid, don't add anything to Ops.
  3625. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  3626. std::string &Constraint,
  3627. std::vector<SDValue> &Ops,
  3628. SelectionDAG &DAG) const {
  3629. if (Constraint.length() > 1) return;
  3630. char ConstraintLetter = Constraint[0];
  3631. switch (ConstraintLetter) {
  3632. default: break;
  3633. case 'X': // Allows any operand; labels (basic block) use this.
  3634. if (Op.getOpcode() == ISD::BasicBlock ||
  3635. Op.getOpcode() == ISD::TargetBlockAddress) {
  3636. Ops.push_back(Op);
  3637. return;
  3638. }
  3639. LLVM_FALLTHROUGH;
  3640. case 'i': // Simple Integer or Relocatable Constant
  3641. case 'n': // Simple Integer
  3642. case 's': { // Relocatable Constant
  3643. GlobalAddressSDNode *GA;
  3644. ConstantSDNode *C;
  3645. BlockAddressSDNode *BA;
  3646. uint64_t Offset = 0;
  3647. // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
  3648. // etc., since getelementpointer is variadic. We can't use
  3649. // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
  3650. // while in this case the GA may be furthest from the root node which is
  3651. // likely an ISD::ADD.
  3652. while (1) {
  3653. if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
  3654. Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
  3655. GA->getValueType(0),
  3656. Offset + GA->getOffset()));
  3657. return;
  3658. } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
  3659. ConstraintLetter != 's') {
  3660. // gcc prints these as sign extended. Sign extend value to 64 bits
  3661. // now; without this it would get ZExt'd later in
  3662. // ScheduleDAGSDNodes::EmitNode, which is very generic.
  3663. bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
  3664. BooleanContent BCont = getBooleanContents(MVT::i64);
  3665. ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
  3666. : ISD::SIGN_EXTEND;
  3667. int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
  3668. : C->getSExtValue();
  3669. Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
  3670. SDLoc(C), MVT::i64));
  3671. return;
  3672. } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
  3673. ConstraintLetter != 'n') {
  3674. Ops.push_back(DAG.getTargetBlockAddress(
  3675. BA->getBlockAddress(), BA->getValueType(0),
  3676. Offset + BA->getOffset(), BA->getTargetFlags()));
  3677. return;
  3678. } else {
  3679. const unsigned OpCode = Op.getOpcode();
  3680. if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
  3681. if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
  3682. Op = Op.getOperand(1);
  3683. // Subtraction is not commutative.
  3684. else if (OpCode == ISD::ADD &&
  3685. (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
  3686. Op = Op.getOperand(0);
  3687. else
  3688. return;
  3689. Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
  3690. continue;
  3691. }
  3692. }
  3693. return;
  3694. }
  3695. break;
  3696. }
  3697. }
  3698. }
  3699. std::pair<unsigned, const TargetRegisterClass *>
  3700. TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
  3701. StringRef Constraint,
  3702. MVT VT) const {
  3703. if (Constraint.empty() || Constraint[0] != '{')
  3704. return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
  3705. assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
  3706. // Remove the braces from around the name.
  3707. StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
  3708. std::pair<unsigned, const TargetRegisterClass *> R =
  3709. std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
  3710. // Figure out which register class contains this reg.
  3711. for (const TargetRegisterClass *RC : RI->regclasses()) {
  3712. // If none of the value types for this register class are valid, we
  3713. // can't use it. For example, 64-bit reg classes on 32-bit targets.
  3714. if (!isLegalRC(*RI, *RC))
  3715. continue;
  3716. for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
  3717. I != E; ++I) {
  3718. if (RegName.equals_lower(RI->getRegAsmName(*I))) {
  3719. std::pair<unsigned, const TargetRegisterClass *> S =
  3720. std::make_pair(*I, RC);
  3721. // If this register class has the requested value type, return it,
  3722. // otherwise keep searching and return the first class found
  3723. // if no other is found which explicitly has the requested type.
  3724. if (RI->isTypeLegalForClass(*RC, VT))
  3725. return S;
  3726. if (!R.second)
  3727. R = S;
  3728. }
  3729. }
  3730. }
  3731. return R;
  3732. }
  3733. //===----------------------------------------------------------------------===//
  3734. // Constraint Selection.
  3735. /// Return true of this is an input operand that is a matching constraint like
  3736. /// "4".
  3737. bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
  3738. assert(!ConstraintCode.empty() && "No known constraint!");
  3739. return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
  3740. }
  3741. /// If this is an input matching constraint, this method returns the output
  3742. /// operand it matches.
  3743. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
  3744. assert(!ConstraintCode.empty() && "No known constraint!");
  3745. return atoi(ConstraintCode.c_str());
  3746. }
  3747. /// Split up the constraint string from the inline assembly value into the
  3748. /// specific constraints and their prefixes, and also tie in the associated
  3749. /// operand values.
  3750. /// If this returns an empty vector, and if the constraint string itself
  3751. /// isn't empty, there was an error parsing.
  3752. TargetLowering::AsmOperandInfoVector
  3753. TargetLowering::ParseConstraints(const DataLayout &DL,
  3754. const TargetRegisterInfo *TRI,
  3755. ImmutableCallSite CS) const {
  3756. /// Information about all of the constraints.
  3757. AsmOperandInfoVector ConstraintOperands;
  3758. const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
  3759. unsigned maCount = 0; // Largest number of multiple alternative constraints.
  3760. // Do a prepass over the constraints, canonicalizing them, and building up the
  3761. // ConstraintOperands list.
  3762. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
  3763. unsigned ResNo = 0; // ResNo - The result number of the next output.
  3764. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
  3765. ConstraintOperands.emplace_back(std::move(CI));
  3766. AsmOperandInfo &OpInfo = ConstraintOperands.back();
  3767. // Update multiple alternative constraint count.
  3768. if (OpInfo.multipleAlternatives.size() > maCount)
  3769. maCount = OpInfo.multipleAlternatives.size();
  3770. OpInfo.ConstraintVT = MVT::Other;
  3771. // Compute the value type for each operand.
  3772. switch (OpInfo.Type) {
  3773. case InlineAsm::isOutput:
  3774. // Indirect outputs just consume an argument.
  3775. if (OpInfo.isIndirect) {
  3776. OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
  3777. break;
  3778. }
  3779. // The return value of the call is this value. As such, there is no
  3780. // corresponding argument.
  3781. assert(!CS.getType()->isVoidTy() &&
  3782. "Bad inline asm!");
  3783. if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
  3784. OpInfo.ConstraintVT =
  3785. getSimpleValueType(DL, STy->getElementType(ResNo));
  3786. } else {
  3787. assert(ResNo == 0 && "Asm only has one result!");
  3788. OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
  3789. }
  3790. ++ResNo;
  3791. break;
  3792. case InlineAsm::isInput:
  3793. OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
  3794. break;
  3795. case InlineAsm::isClobber:
  3796. // Nothing to do.
  3797. break;
  3798. }
  3799. if (OpInfo.CallOperandVal) {
  3800. llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
  3801. if (OpInfo.isIndirect) {
  3802. llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
  3803. if (!PtrTy)
  3804. report_fatal_error("Indirect operand for inline asm not a pointer!");
  3805. OpTy = PtrTy->getElementType();
  3806. }
  3807. // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
  3808. if (StructType *STy = dyn_cast<StructType>(OpTy))
  3809. if (STy->getNumElements() == 1)
  3810. OpTy = STy->getElementType(0);
  3811. // If OpTy is not a single value, it may be a struct/union that we
  3812. // can tile with integers.
  3813. if (!OpTy->isSingleValueType() && OpTy->isSized()) {
  3814. unsigned BitSize = DL.getTypeSizeInBits(OpTy);
  3815. switch (BitSize) {
  3816. default: break;
  3817. case 1:
  3818. case 8:
  3819. case 16:
  3820. case 32:
  3821. case 64:
  3822. case 128:
  3823. OpInfo.ConstraintVT =
  3824. MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
  3825. break;
  3826. }
  3827. } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
  3828. unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
  3829. OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
  3830. } else {
  3831. OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
  3832. }
  3833. }
  3834. }
  3835. // If we have multiple alternative constraints, select the best alternative.
  3836. if (!ConstraintOperands.empty()) {
  3837. if (maCount) {
  3838. unsigned bestMAIndex = 0;
  3839. int bestWeight = -1;
  3840. // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
  3841. int weight = -1;
  3842. unsigned maIndex;
  3843. // Compute the sums of the weights for each alternative, keeping track
  3844. // of the best (highest weight) one so far.
  3845. for (maIndex = 0; maIndex < maCount; ++maIndex) {
  3846. int weightSum = 0;
  3847. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3848. cIndex != eIndex; ++cIndex) {
  3849. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  3850. if (OpInfo.Type == InlineAsm::isClobber)
  3851. continue;
  3852. // If this is an output operand with a matching input operand,
  3853. // look up the matching input. If their types mismatch, e.g. one
  3854. // is an integer, the other is floating point, or their sizes are
  3855. // different, flag it as an maCantMatch.
  3856. if (OpInfo.hasMatchingInput()) {
  3857. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  3858. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  3859. if ((OpInfo.ConstraintVT.isInteger() !=
  3860. Input.ConstraintVT.isInteger()) ||
  3861. (OpInfo.ConstraintVT.getSizeInBits() !=
  3862. Input.ConstraintVT.getSizeInBits())) {
  3863. weightSum = -1; // Can't match.
  3864. break;
  3865. }
  3866. }
  3867. }
  3868. weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
  3869. if (weight == -1) {
  3870. weightSum = -1;
  3871. break;
  3872. }
  3873. weightSum += weight;
  3874. }
  3875. // Update best.
  3876. if (weightSum > bestWeight) {
  3877. bestWeight = weightSum;
  3878. bestMAIndex = maIndex;
  3879. }
  3880. }
  3881. // Now select chosen alternative in each constraint.
  3882. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3883. cIndex != eIndex; ++cIndex) {
  3884. AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
  3885. if (cInfo.Type == InlineAsm::isClobber)
  3886. continue;
  3887. cInfo.selectAlternative(bestMAIndex);
  3888. }
  3889. }
  3890. }
  3891. // Check and hook up tied operands, choose constraint code to use.
  3892. for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
  3893. cIndex != eIndex; ++cIndex) {
  3894. AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
  3895. // If this is an output operand with a matching input operand, look up the
  3896. // matching input. If their types mismatch, e.g. one is an integer, the
  3897. // other is floating point, or their sizes are different, flag it as an
  3898. // error.
  3899. if (OpInfo.hasMatchingInput()) {
  3900. AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  3901. if (OpInfo.ConstraintVT != Input.ConstraintVT) {
  3902. std::pair<unsigned, const TargetRegisterClass *> MatchRC =
  3903. getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
  3904. OpInfo.ConstraintVT);
  3905. std::pair<unsigned, const TargetRegisterClass *> InputRC =
  3906. getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
  3907. Input.ConstraintVT);
  3908. if ((OpInfo.ConstraintVT.isInteger() !=
  3909. Input.ConstraintVT.isInteger()) ||
  3910. (MatchRC.second != InputRC.second)) {
  3911. report_fatal_error("Unsupported asm: input constraint"
  3912. " with a matching output constraint of"
  3913. " incompatible type!");
  3914. }
  3915. }
  3916. }
  3917. }
  3918. return ConstraintOperands;
  3919. }
  3920. /// Return an integer indicating how general CT is.
  3921. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
  3922. switch (CT) {
  3923. case TargetLowering::C_Immediate:
  3924. case TargetLowering::C_Other:
  3925. case TargetLowering::C_Unknown:
  3926. return 0;
  3927. case TargetLowering::C_Register:
  3928. return 1;
  3929. case TargetLowering::C_RegisterClass:
  3930. return 2;
  3931. case TargetLowering::C_Memory:
  3932. return 3;
  3933. }
  3934. llvm_unreachable("Invalid constraint type");
  3935. }
  3936. /// Examine constraint type and operand type and determine a weight value.
  3937. /// This object must already have been set up with the operand type
  3938. /// and the current alternative constraint selected.
  3939. TargetLowering::ConstraintWeight
  3940. TargetLowering::getMultipleConstraintMatchWeight(
  3941. AsmOperandInfo &info, int maIndex) const {
  3942. InlineAsm::ConstraintCodeVector *rCodes;
  3943. if (maIndex >= (int)info.multipleAlternatives.size())
  3944. rCodes = &info.Codes;
  3945. else
  3946. rCodes = &info.multipleAlternatives[maIndex].Codes;
  3947. ConstraintWeight BestWeight = CW_Invalid;
  3948. // Loop over the options, keeping track of the most general one.
  3949. for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
  3950. ConstraintWeight weight =
  3951. getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
  3952. if (weight > BestWeight)
  3953. BestWeight = weight;
  3954. }
  3955. return BestWeight;
  3956. }
  3957. /// Examine constraint type and operand type and determine a weight value.
  3958. /// This object must already have been set up with the operand type
  3959. /// and the current alternative constraint selected.
  3960. TargetLowering::ConstraintWeight
  3961. TargetLowering::getSingleConstraintMatchWeight(
  3962. AsmOperandInfo &info, const char *constraint) const {
  3963. ConstraintWeight weight = CW_Invalid;
  3964. Value *CallOperandVal = info.CallOperandVal;
  3965. // If we don't have a value, we can't do a match,
  3966. // but allow it at the lowest weight.
  3967. if (!CallOperandVal)
  3968. return CW_Default;
  3969. // Look at the constraint type.
  3970. switch (*constraint) {
  3971. case 'i': // immediate integer.
  3972. case 'n': // immediate integer with a known value.
  3973. if (isa<ConstantInt>(CallOperandVal))
  3974. weight = CW_Constant;
  3975. break;
  3976. case 's': // non-explicit intregal immediate.
  3977. if (isa<GlobalValue>(CallOperandVal))
  3978. weight = CW_Constant;
  3979. break;
  3980. case 'E': // immediate float if host format.
  3981. case 'F': // immediate float.
  3982. if (isa<ConstantFP>(CallOperandVal))
  3983. weight = CW_Constant;
  3984. break;
  3985. case '<': // memory operand with autodecrement.
  3986. case '>': // memory operand with autoincrement.
  3987. case 'm': // memory operand.
  3988. case 'o': // offsettable memory operand
  3989. case 'V': // non-offsettable memory operand
  3990. weight = CW_Memory;
  3991. break;
  3992. case 'r': // general register.
  3993. case 'g': // general register, memory operand or immediate integer.
  3994. // note: Clang converts "g" to "imr".
  3995. if (CallOperandVal->getType()->isIntegerTy())
  3996. weight = CW_Register;
  3997. break;
  3998. case 'X': // any operand.
  3999. default:
  4000. weight = CW_Default;
  4001. break;
  4002. }
  4003. return weight;
  4004. }
  4005. /// If there are multiple different constraints that we could pick for this
  4006. /// operand (e.g. "imr") try to pick the 'best' one.
  4007. /// This is somewhat tricky: constraints fall into four classes:
  4008. /// Other -> immediates and magic values
  4009. /// Register -> one specific register
  4010. /// RegisterClass -> a group of regs
  4011. /// Memory -> memory
  4012. /// Ideally, we would pick the most specific constraint possible: if we have
  4013. /// something that fits into a register, we would pick it. The problem here
  4014. /// is that if we have something that could either be in a register or in
  4015. /// memory that use of the register could cause selection of *other*
  4016. /// operands to fail: they might only succeed if we pick memory. Because of
  4017. /// this the heuristic we use is:
  4018. ///
  4019. /// 1) If there is an 'other' constraint, and if the operand is valid for
  4020. /// that constraint, use it. This makes us take advantage of 'i'
  4021. /// constraints when available.
  4022. /// 2) Otherwise, pick the most general constraint present. This prefers
  4023. /// 'm' over 'r', for example.
  4024. ///
  4025. static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
  4026. const TargetLowering &TLI,
  4027. SDValue Op, SelectionDAG *DAG) {
  4028. assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
  4029. unsigned BestIdx = 0;
  4030. TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
  4031. int BestGenerality = -1;
  4032. // Loop over the options, keeping track of the most general one.
  4033. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
  4034. TargetLowering::ConstraintType CType =
  4035. TLI.getConstraintType(OpInfo.Codes[i]);
  4036. // If this is an 'other' or 'immediate' constraint, see if the operand is
  4037. // valid for it. For example, on X86 we might have an 'rI' constraint. If
  4038. // the operand is an integer in the range [0..31] we want to use I (saving a
  4039. // load of a register), otherwise we must use 'r'.
  4040. if ((CType == TargetLowering::C_Other ||
  4041. CType == TargetLowering::C_Immediate) && Op.getNode()) {
  4042. assert(OpInfo.Codes[i].size() == 1 &&
  4043. "Unhandled multi-letter 'other' constraint");
  4044. std::vector<SDValue> ResultOps;
  4045. TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
  4046. ResultOps, *DAG);
  4047. if (!ResultOps.empty()) {
  4048. BestType = CType;
  4049. BestIdx = i;
  4050. break;
  4051. }
  4052. }
  4053. // Things with matching constraints can only be registers, per gcc
  4054. // documentation. This mainly affects "g" constraints.
  4055. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
  4056. continue;
  4057. // This constraint letter is more general than the previous one, use it.
  4058. int Generality = getConstraintGenerality(CType);
  4059. if (Generality > BestGenerality) {
  4060. BestType = CType;
  4061. BestIdx = i;
  4062. BestGenerality = Generality;
  4063. }
  4064. }
  4065. OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
  4066. OpInfo.ConstraintType = BestType;
  4067. }
  4068. /// Determines the constraint code and constraint type to use for the specific
  4069. /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
  4070. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
  4071. SDValue Op,
  4072. SelectionDAG *DAG) const {
  4073. assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
  4074. // Single-letter constraints ('r') are very common.
  4075. if (OpInfo.Codes.size() == 1) {
  4076. OpInfo.ConstraintCode = OpInfo.Codes[0];
  4077. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  4078. } else {
  4079. ChooseConstraint(OpInfo, *this, Op, DAG);
  4080. }
  4081. // 'X' matches anything.
  4082. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
  4083. // Labels and constants are handled elsewhere ('X' is the only thing
  4084. // that matches labels). For Functions, the type here is the type of
  4085. // the result, which is not what we want to look at; leave them alone.
  4086. Value *v = OpInfo.CallOperandVal;
  4087. if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
  4088. OpInfo.CallOperandVal = v;
  4089. return;
  4090. }
  4091. if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
  4092. return;
  4093. // Otherwise, try to resolve it to something we know about by looking at
  4094. // the actual operand type.
  4095. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
  4096. OpInfo.ConstraintCode = Repl;
  4097. OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
  4098. }
  4099. }
  4100. }
  4101. /// Given an exact SDIV by a constant, create a multiplication
  4102. /// with the multiplicative inverse of the constant.
  4103. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
  4104. const SDLoc &dl, SelectionDAG &DAG,
  4105. SmallVectorImpl<SDNode *> &Created) {
  4106. SDValue Op0 = N->getOperand(0);
  4107. SDValue Op1 = N->getOperand(1);
  4108. EVT VT = N->getValueType(0);
  4109. EVT SVT = VT.getScalarType();
  4110. EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
  4111. EVT ShSVT = ShVT.getScalarType();
  4112. bool UseSRA = false;
  4113. SmallVector<SDValue, 16> Shifts, Factors;
  4114. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  4115. if (C->isNullValue())
  4116. return false;
  4117. APInt Divisor = C->getAPIntValue();
  4118. unsigned Shift = Divisor.countTrailingZeros();
  4119. if (Shift) {
  4120. Divisor.ashrInPlace(Shift);
  4121. UseSRA = true;
  4122. }
  4123. // Calculate the multiplicative inverse, using Newton's method.
  4124. APInt t;
  4125. APInt Factor = Divisor;
  4126. while ((t = Divisor * Factor) != 1)
  4127. Factor *= APInt(Divisor.getBitWidth(), 2) - t;
  4128. Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
  4129. Factors.push_back(DAG.getConstant(Factor, dl, SVT));
  4130. return true;
  4131. };
  4132. // Collect all magic values from the build vector.
  4133. if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
  4134. return SDValue();
  4135. SDValue Shift, Factor;
  4136. if (VT.isVector()) {
  4137. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  4138. Factor = DAG.getBuildVector(VT, dl, Factors);
  4139. } else {
  4140. Shift = Shifts[0];
  4141. Factor = Factors[0];
  4142. }
  4143. SDValue Res = Op0;
  4144. // Shift the value upfront if it is even, so the LSB is one.
  4145. if (UseSRA) {
  4146. // TODO: For UDIV use SRL instead of SRA.
  4147. SDNodeFlags Flags;
  4148. Flags.setExact(true);
  4149. Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
  4150. Created.push_back(Res.getNode());
  4151. }
  4152. return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
  4153. }
  4154. SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  4155. SelectionDAG &DAG,
  4156. SmallVectorImpl<SDNode *> &Created) const {
  4157. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
  4158. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  4159. if (TLI.isIntDivCheap(N->getValueType(0), Attr))
  4160. return SDValue(N, 0); // Lower SDIV as SDIV
  4161. return SDValue();
  4162. }
  4163. /// Given an ISD::SDIV node expressing a divide by constant,
  4164. /// return a DAG expression to select that will generate the same value by
  4165. /// multiplying by a magic number.
  4166. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  4167. SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
  4168. bool IsAfterLegalization,
  4169. SmallVectorImpl<SDNode *> &Created) const {
  4170. SDLoc dl(N);
  4171. EVT VT = N->getValueType(0);
  4172. EVT SVT = VT.getScalarType();
  4173. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4174. EVT ShSVT = ShVT.getScalarType();
  4175. unsigned EltBits = VT.getScalarSizeInBits();
  4176. // Check to see if we can do this.
  4177. // FIXME: We should be more aggressive here.
  4178. if (!isTypeLegal(VT))
  4179. return SDValue();
  4180. // If the sdiv has an 'exact' bit we can use a simpler lowering.
  4181. if (N->getFlags().hasExact())
  4182. return BuildExactSDIV(*this, N, dl, DAG, Created);
  4183. SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
  4184. auto BuildSDIVPattern = [&](ConstantSDNode *C) {
  4185. if (C->isNullValue())
  4186. return false;
  4187. const APInt &Divisor = C->getAPIntValue();
  4188. APInt::ms magics = Divisor.magic();
  4189. int NumeratorFactor = 0;
  4190. int ShiftMask = -1;
  4191. if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
  4192. // If d is +1/-1, we just multiply the numerator by +1/-1.
  4193. NumeratorFactor = Divisor.getSExtValue();
  4194. magics.m = 0;
  4195. magics.s = 0;
  4196. ShiftMask = 0;
  4197. } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
  4198. // If d > 0 and m < 0, add the numerator.
  4199. NumeratorFactor = 1;
  4200. } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
  4201. // If d < 0 and m > 0, subtract the numerator.
  4202. NumeratorFactor = -1;
  4203. }
  4204. MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
  4205. Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
  4206. Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
  4207. ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
  4208. return true;
  4209. };
  4210. SDValue N0 = N->getOperand(0);
  4211. SDValue N1 = N->getOperand(1);
  4212. // Collect the shifts / magic values from each element.
  4213. if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
  4214. return SDValue();
  4215. SDValue MagicFactor, Factor, Shift, ShiftMask;
  4216. if (VT.isVector()) {
  4217. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  4218. Factor = DAG.getBuildVector(VT, dl, Factors);
  4219. Shift = DAG.getBuildVector(ShVT, dl, Shifts);
  4220. ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
  4221. } else {
  4222. MagicFactor = MagicFactors[0];
  4223. Factor = Factors[0];
  4224. Shift = Shifts[0];
  4225. ShiftMask = ShiftMasks[0];
  4226. }
  4227. // Multiply the numerator (operand 0) by the magic value.
  4228. // FIXME: We should support doing a MUL in a wider type.
  4229. SDValue Q;
  4230. if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
  4231. : isOperationLegalOrCustom(ISD::MULHS, VT))
  4232. Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
  4233. else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
  4234. : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
  4235. SDValue LoHi =
  4236. DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
  4237. Q = SDValue(LoHi.getNode(), 1);
  4238. } else
  4239. return SDValue(); // No mulhs or equivalent.
  4240. Created.push_back(Q.getNode());
  4241. // (Optionally) Add/subtract the numerator using Factor.
  4242. Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
  4243. Created.push_back(Factor.getNode());
  4244. Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
  4245. Created.push_back(Q.getNode());
  4246. // Shift right algebraic by shift value.
  4247. Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
  4248. Created.push_back(Q.getNode());
  4249. // Extract the sign bit, mask it and add it to the quotient.
  4250. SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
  4251. SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
  4252. Created.push_back(T.getNode());
  4253. T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
  4254. Created.push_back(T.getNode());
  4255. return DAG.getNode(ISD::ADD, dl, VT, Q, T);
  4256. }
  4257. /// Given an ISD::UDIV node expressing a divide by constant,
  4258. /// return a DAG expression to select that will generate the same value by
  4259. /// multiplying by a magic number.
  4260. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  4261. SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
  4262. bool IsAfterLegalization,
  4263. SmallVectorImpl<SDNode *> &Created) const {
  4264. SDLoc dl(N);
  4265. EVT VT = N->getValueType(0);
  4266. EVT SVT = VT.getScalarType();
  4267. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4268. EVT ShSVT = ShVT.getScalarType();
  4269. unsigned EltBits = VT.getScalarSizeInBits();
  4270. // Check to see if we can do this.
  4271. // FIXME: We should be more aggressive here.
  4272. if (!isTypeLegal(VT))
  4273. return SDValue();
  4274. bool UseNPQ = false;
  4275. SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
  4276. auto BuildUDIVPattern = [&](ConstantSDNode *C) {
  4277. if (C->isNullValue())
  4278. return false;
  4279. // FIXME: We should use a narrower constant when the upper
  4280. // bits are known to be zero.
  4281. APInt Divisor = C->getAPIntValue();
  4282. APInt::mu magics = Divisor.magicu();
  4283. unsigned PreShift = 0, PostShift = 0;
  4284. // If the divisor is even, we can avoid using the expensive fixup by
  4285. // shifting the divided value upfront.
  4286. if (magics.a != 0 && !Divisor[0]) {
  4287. PreShift = Divisor.countTrailingZeros();
  4288. // Get magic number for the shifted divisor.
  4289. magics = Divisor.lshr(PreShift).magicu(PreShift);
  4290. assert(magics.a == 0 && "Should use cheap fixup now");
  4291. }
  4292. APInt Magic = magics.m;
  4293. unsigned SelNPQ;
  4294. if (magics.a == 0 || Divisor.isOneValue()) {
  4295. assert(magics.s < Divisor.getBitWidth() &&
  4296. "We shouldn't generate an undefined shift!");
  4297. PostShift = magics.s;
  4298. SelNPQ = false;
  4299. } else {
  4300. PostShift = magics.s - 1;
  4301. SelNPQ = true;
  4302. }
  4303. PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
  4304. MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
  4305. NPQFactors.push_back(
  4306. DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
  4307. : APInt::getNullValue(EltBits),
  4308. dl, SVT));
  4309. PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
  4310. UseNPQ |= SelNPQ;
  4311. return true;
  4312. };
  4313. SDValue N0 = N->getOperand(0);
  4314. SDValue N1 = N->getOperand(1);
  4315. // Collect the shifts/magic values from each element.
  4316. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
  4317. return SDValue();
  4318. SDValue PreShift, PostShift, MagicFactor, NPQFactor;
  4319. if (VT.isVector()) {
  4320. PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
  4321. MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
  4322. NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
  4323. PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
  4324. } else {
  4325. PreShift = PreShifts[0];
  4326. MagicFactor = MagicFactors[0];
  4327. PostShift = PostShifts[0];
  4328. }
  4329. SDValue Q = N0;
  4330. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
  4331. Created.push_back(Q.getNode());
  4332. // FIXME: We should support doing a MUL in a wider type.
  4333. auto GetMULHU = [&](SDValue X, SDValue Y) {
  4334. if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
  4335. : isOperationLegalOrCustom(ISD::MULHU, VT))
  4336. return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
  4337. if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
  4338. : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
  4339. SDValue LoHi =
  4340. DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
  4341. return SDValue(LoHi.getNode(), 1);
  4342. }
  4343. return SDValue(); // No mulhu or equivalent
  4344. };
  4345. // Multiply the numerator (operand 0) by the magic value.
  4346. Q = GetMULHU(Q, MagicFactor);
  4347. if (!Q)
  4348. return SDValue();
  4349. Created.push_back(Q.getNode());
  4350. if (UseNPQ) {
  4351. SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
  4352. Created.push_back(NPQ.getNode());
  4353. // For vectors we might have a mix of non-NPQ/NPQ paths, so use
  4354. // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
  4355. if (VT.isVector())
  4356. NPQ = GetMULHU(NPQ, NPQFactor);
  4357. else
  4358. NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
  4359. Created.push_back(NPQ.getNode());
  4360. Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
  4361. Created.push_back(Q.getNode());
  4362. }
  4363. Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
  4364. Created.push_back(Q.getNode());
  4365. SDValue One = DAG.getConstant(1, dl, VT);
  4366. SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
  4367. return DAG.getSelect(dl, VT, IsOne, N0, Q);
  4368. }
  4369. /// If all values in Values that *don't* match the predicate are same 'splat'
  4370. /// value, then replace all values with that splat value.
  4371. /// Else, if AlternativeReplacement was provided, then replace all values that
  4372. /// do match predicate with AlternativeReplacement value.
  4373. static void
  4374. turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
  4375. std::function<bool(SDValue)> Predicate,
  4376. SDValue AlternativeReplacement = SDValue()) {
  4377. SDValue Replacement;
  4378. // Is there a value for which the Predicate does *NOT* match? What is it?
  4379. auto SplatValue = llvm::find_if_not(Values, Predicate);
  4380. if (SplatValue != Values.end()) {
  4381. // Does Values consist only of SplatValue's and values matching Predicate?
  4382. if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
  4383. return Value == *SplatValue || Predicate(Value);
  4384. })) // Then we shall replace values matching predicate with SplatValue.
  4385. Replacement = *SplatValue;
  4386. }
  4387. if (!Replacement) {
  4388. // Oops, we did not find the "baseline" splat value.
  4389. if (!AlternativeReplacement)
  4390. return; // Nothing to do.
  4391. // Let's replace with provided value then.
  4392. Replacement = AlternativeReplacement;
  4393. }
  4394. std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
  4395. }
  4396. /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
  4397. /// where the divisor is constant and the comparison target is zero,
  4398. /// return a DAG expression that will generate the same comparison result
  4399. /// using only multiplications, additions and shifts/rotations.
  4400. /// Ref: "Hacker's Delight" 10-17.
  4401. SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
  4402. SDValue CompTargetNode,
  4403. ISD::CondCode Cond,
  4404. DAGCombinerInfo &DCI,
  4405. const SDLoc &DL) const {
  4406. SmallVector<SDNode *, 2> Built;
  4407. if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  4408. DCI, DL, Built)) {
  4409. for (SDNode *N : Built)
  4410. DCI.AddToWorklist(N);
  4411. return Folded;
  4412. }
  4413. return SDValue();
  4414. }
  4415. SDValue
  4416. TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
  4417. SDValue CompTargetNode, ISD::CondCode Cond,
  4418. DAGCombinerInfo &DCI, const SDLoc &DL,
  4419. SmallVectorImpl<SDNode *> &Created) const {
  4420. // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
  4421. // - D must be constant, with D = D0 * 2^K where D0 is odd
  4422. // - P is the multiplicative inverse of D0 modulo 2^W
  4423. // - Q = floor(((2^W) - 1) / D)
  4424. // where W is the width of the common type of N and D.
  4425. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4426. "Only applicable for (in)equality comparisons.");
  4427. SelectionDAG &DAG = DCI.DAG;
  4428. EVT VT = REMNode.getValueType();
  4429. EVT SVT = VT.getScalarType();
  4430. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4431. EVT ShSVT = ShVT.getScalarType();
  4432. // If MUL is unavailable, we cannot proceed in any case.
  4433. if (!isOperationLegalOrCustom(ISD::MUL, VT))
  4434. return SDValue();
  4435. // TODO: Could support comparing with non-zero too.
  4436. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
  4437. if (!CompTarget || !CompTarget->isNullValue())
  4438. return SDValue();
  4439. bool HadOneDivisor = false;
  4440. bool AllDivisorsAreOnes = true;
  4441. bool HadEvenDivisor = false;
  4442. bool AllDivisorsArePowerOfTwo = true;
  4443. SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
  4444. auto BuildUREMPattern = [&](ConstantSDNode *C) {
  4445. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  4446. if (C->isNullValue())
  4447. return false;
  4448. const APInt &D = C->getAPIntValue();
  4449. // If all divisors are ones, we will prefer to avoid the fold.
  4450. HadOneDivisor |= D.isOneValue();
  4451. AllDivisorsAreOnes &= D.isOneValue();
  4452. // Decompose D into D0 * 2^K
  4453. unsigned K = D.countTrailingZeros();
  4454. assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
  4455. APInt D0 = D.lshr(K);
  4456. // D is even if it has trailing zeros.
  4457. HadEvenDivisor |= (K != 0);
  4458. // D is a power-of-two if D0 is one.
  4459. // If all divisors are power-of-two, we will prefer to avoid the fold.
  4460. AllDivisorsArePowerOfTwo &= D0.isOneValue();
  4461. // P = inv(D0, 2^W)
  4462. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  4463. unsigned W = D.getBitWidth();
  4464. APInt P = D0.zext(W + 1)
  4465. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  4466. .trunc(W);
  4467. assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
  4468. assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
  4469. // Q = floor((2^W - 1) / D)
  4470. APInt Q = APInt::getAllOnesValue(W).udiv(D);
  4471. assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
  4472. "We are expecting that K is always less than all-ones for ShSVT");
  4473. // If the divisor is 1 the result can be constant-folded.
  4474. if (D.isOneValue()) {
  4475. // Set P and K amount to a bogus values so we can try to splat them.
  4476. P = 0;
  4477. K = -1;
  4478. assert(Q.isAllOnesValue() &&
  4479. "Expecting all-ones comparison for one divisor");
  4480. }
  4481. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  4482. KAmts.push_back(
  4483. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  4484. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  4485. return true;
  4486. };
  4487. SDValue N = REMNode.getOperand(0);
  4488. SDValue D = REMNode.getOperand(1);
  4489. // Collect the values from each element.
  4490. if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
  4491. return SDValue();
  4492. // If this is a urem by a one, avoid the fold since it can be constant-folded.
  4493. if (AllDivisorsAreOnes)
  4494. return SDValue();
  4495. // If this is a urem by a powers-of-two, avoid the fold since it can be
  4496. // best implemented as a bit test.
  4497. if (AllDivisorsArePowerOfTwo)
  4498. return SDValue();
  4499. SDValue PVal, KVal, QVal;
  4500. if (VT.isVector()) {
  4501. if (HadOneDivisor) {
  4502. // Try to turn PAmts into a splat, since we don't care about the values
  4503. // that are currently '0'. If we can't, just keep '0'`s.
  4504. turnVectorIntoSplatVector(PAmts, isNullConstant);
  4505. // Try to turn KAmts into a splat, since we don't care about the values
  4506. // that are currently '-1'. If we can't, change them to '0'`s.
  4507. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  4508. DAG.getConstant(0, DL, ShSVT));
  4509. }
  4510. PVal = DAG.getBuildVector(VT, DL, PAmts);
  4511. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  4512. QVal = DAG.getBuildVector(VT, DL, QAmts);
  4513. } else {
  4514. PVal = PAmts[0];
  4515. KVal = KAmts[0];
  4516. QVal = QAmts[0];
  4517. }
  4518. // (mul N, P)
  4519. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  4520. Created.push_back(Op0.getNode());
  4521. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  4522. // divisors as a performance improvement, since rotating by 0 is a no-op.
  4523. if (HadEvenDivisor) {
  4524. // We need ROTR to do this.
  4525. if (!isOperationLegalOrCustom(ISD::ROTR, VT))
  4526. return SDValue();
  4527. SDNodeFlags Flags;
  4528. Flags.setExact(true);
  4529. // UREM: (rotr (mul N, P), K)
  4530. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
  4531. Created.push_back(Op0.getNode());
  4532. }
  4533. // UREM: (setule/setugt (rotr (mul N, P), K), Q)
  4534. return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  4535. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  4536. }
  4537. /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
  4538. /// where the divisor is constant and the comparison target is zero,
  4539. /// return a DAG expression that will generate the same comparison result
  4540. /// using only multiplications, additions and shifts/rotations.
  4541. /// Ref: "Hacker's Delight" 10-17.
  4542. SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
  4543. SDValue CompTargetNode,
  4544. ISD::CondCode Cond,
  4545. DAGCombinerInfo &DCI,
  4546. const SDLoc &DL) const {
  4547. SmallVector<SDNode *, 7> Built;
  4548. if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
  4549. DCI, DL, Built)) {
  4550. assert(Built.size() <= 7 && "Max size prediction failed.");
  4551. for (SDNode *N : Built)
  4552. DCI.AddToWorklist(N);
  4553. return Folded;
  4554. }
  4555. return SDValue();
  4556. }
  4557. SDValue
  4558. TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
  4559. SDValue CompTargetNode, ISD::CondCode Cond,
  4560. DAGCombinerInfo &DCI, const SDLoc &DL,
  4561. SmallVectorImpl<SDNode *> &Created) const {
  4562. // Fold:
  4563. // (seteq/ne (srem N, D), 0)
  4564. // To:
  4565. // (setule/ugt (rotr (add (mul N, P), A), K), Q)
  4566. //
  4567. // - D must be constant, with D = D0 * 2^K where D0 is odd
  4568. // - P is the multiplicative inverse of D0 modulo 2^W
  4569. // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
  4570. // - Q = floor((2 * A) / (2^K))
  4571. // where W is the width of the common type of N and D.
  4572. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
  4573. "Only applicable for (in)equality comparisons.");
  4574. SelectionDAG &DAG = DCI.DAG;
  4575. EVT VT = REMNode.getValueType();
  4576. EVT SVT = VT.getScalarType();
  4577. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  4578. EVT ShSVT = ShVT.getScalarType();
  4579. // If MUL is unavailable, we cannot proceed in any case.
  4580. if (!isOperationLegalOrCustom(ISD::MUL, VT))
  4581. return SDValue();
  4582. // TODO: Could support comparing with non-zero too.
  4583. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
  4584. if (!CompTarget || !CompTarget->isNullValue())
  4585. return SDValue();
  4586. bool HadIntMinDivisor = false;
  4587. bool HadOneDivisor = false;
  4588. bool AllDivisorsAreOnes = true;
  4589. bool HadEvenDivisor = false;
  4590. bool NeedToApplyOffset = false;
  4591. bool AllDivisorsArePowerOfTwo = true;
  4592. SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
  4593. auto BuildSREMPattern = [&](ConstantSDNode *C) {
  4594. // Division by 0 is UB. Leave it to be constant-folded elsewhere.
  4595. if (C->isNullValue())
  4596. return false;
  4597. // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
  4598. // WARNING: this fold is only valid for positive divisors!
  4599. APInt D = C->getAPIntValue();
  4600. if (D.isNegative())
  4601. D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
  4602. HadIntMinDivisor |= D.isMinSignedValue();
  4603. // If all divisors are ones, we will prefer to avoid the fold.
  4604. HadOneDivisor |= D.isOneValue();
  4605. AllDivisorsAreOnes &= D.isOneValue();
  4606. // Decompose D into D0 * 2^K
  4607. unsigned K = D.countTrailingZeros();
  4608. assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
  4609. APInt D0 = D.lshr(K);
  4610. if (!D.isMinSignedValue()) {
  4611. // D is even if it has trailing zeros; unless it's INT_MIN, in which case
  4612. // we don't care about this lane in this fold, we'll special-handle it.
  4613. HadEvenDivisor |= (K != 0);
  4614. }
  4615. // D is a power-of-two if D0 is one. This includes INT_MIN.
  4616. // If all divisors are power-of-two, we will prefer to avoid the fold.
  4617. AllDivisorsArePowerOfTwo &= D0.isOneValue();
  4618. // P = inv(D0, 2^W)
  4619. // 2^W requires W + 1 bits, so we have to extend and then truncate.
  4620. unsigned W = D.getBitWidth();
  4621. APInt P = D0.zext(W + 1)
  4622. .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
  4623. .trunc(W);
  4624. assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
  4625. assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
  4626. // A = floor((2^(W - 1) - 1) / D0) & -2^K
  4627. APInt A = APInt::getSignedMaxValue(W).udiv(D0);
  4628. A.clearLowBits(K);
  4629. if (!D.isMinSignedValue()) {
  4630. // If divisor INT_MIN, then we don't care about this lane in this fold,
  4631. // we'll special-handle it.
  4632. NeedToApplyOffset |= A != 0;
  4633. }
  4634. // Q = floor((2 * A) / (2^K))
  4635. APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
  4636. assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
  4637. "We are expecting that A is always less than all-ones for SVT");
  4638. assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
  4639. "We are expecting that K is always less than all-ones for ShSVT");
  4640. // If the divisor is 1 the result can be constant-folded. Likewise, we
  4641. // don't care about INT_MIN lanes, those can be set to undef if appropriate.
  4642. if (D.isOneValue()) {
  4643. // Set P, A and K to a bogus values so we can try to splat them.
  4644. P = 0;
  4645. A = -1;
  4646. K = -1;
  4647. // x ?% 1 == 0 <--> true <--> x u<= -1
  4648. Q = -1;
  4649. }
  4650. PAmts.push_back(DAG.getConstant(P, DL, SVT));
  4651. AAmts.push_back(DAG.getConstant(A, DL, SVT));
  4652. KAmts.push_back(
  4653. DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
  4654. QAmts.push_back(DAG.getConstant(Q, DL, SVT));
  4655. return true;
  4656. };
  4657. SDValue N = REMNode.getOperand(0);
  4658. SDValue D = REMNode.getOperand(1);
  4659. // Collect the values from each element.
  4660. if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
  4661. return SDValue();
  4662. // If this is a srem by a one, avoid the fold since it can be constant-folded.
  4663. if (AllDivisorsAreOnes)
  4664. return SDValue();
  4665. // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
  4666. // since it can be best implemented as a bit test.
  4667. if (AllDivisorsArePowerOfTwo)
  4668. return SDValue();
  4669. SDValue PVal, AVal, KVal, QVal;
  4670. if (VT.isVector()) {
  4671. if (HadOneDivisor) {
  4672. // Try to turn PAmts into a splat, since we don't care about the values
  4673. // that are currently '0'. If we can't, just keep '0'`s.
  4674. turnVectorIntoSplatVector(PAmts, isNullConstant);
  4675. // Try to turn AAmts into a splat, since we don't care about the
  4676. // values that are currently '-1'. If we can't, change them to '0'`s.
  4677. turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
  4678. DAG.getConstant(0, DL, SVT));
  4679. // Try to turn KAmts into a splat, since we don't care about the values
  4680. // that are currently '-1'. If we can't, change them to '0'`s.
  4681. turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
  4682. DAG.getConstant(0, DL, ShSVT));
  4683. }
  4684. PVal = DAG.getBuildVector(VT, DL, PAmts);
  4685. AVal = DAG.getBuildVector(VT, DL, AAmts);
  4686. KVal = DAG.getBuildVector(ShVT, DL, KAmts);
  4687. QVal = DAG.getBuildVector(VT, DL, QAmts);
  4688. } else {
  4689. PVal = PAmts[0];
  4690. AVal = AAmts[0];
  4691. KVal = KAmts[0];
  4692. QVal = QAmts[0];
  4693. }
  4694. // (mul N, P)
  4695. SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
  4696. Created.push_back(Op0.getNode());
  4697. if (NeedToApplyOffset) {
  4698. // We need ADD to do this.
  4699. if (!isOperationLegalOrCustom(ISD::ADD, VT))
  4700. return SDValue();
  4701. // (add (mul N, P), A)
  4702. Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
  4703. Created.push_back(Op0.getNode());
  4704. }
  4705. // Rotate right only if any divisor was even. We avoid rotates for all-odd
  4706. // divisors as a performance improvement, since rotating by 0 is a no-op.
  4707. if (HadEvenDivisor) {
  4708. // We need ROTR to do this.
  4709. if (!isOperationLegalOrCustom(ISD::ROTR, VT))
  4710. return SDValue();
  4711. SDNodeFlags Flags;
  4712. Flags.setExact(true);
  4713. // SREM: (rotr (add (mul N, P), A), K)
  4714. Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
  4715. Created.push_back(Op0.getNode());
  4716. }
  4717. // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
  4718. SDValue Fold =
  4719. DAG.getSetCC(DL, SETCCVT, Op0, QVal,
  4720. ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
  4721. // If we didn't have lanes with INT_MIN divisor, then we're done.
  4722. if (!HadIntMinDivisor)
  4723. return Fold;
  4724. // That fold is only valid for positive divisors. Which effectively means,
  4725. // it is invalid for INT_MIN divisors. So if we have such a lane,
  4726. // we must fix-up results for said lanes.
  4727. assert(VT.isVector() && "Can/should only get here for vectors.");
  4728. if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
  4729. !isOperationLegalOrCustom(ISD::AND, VT) ||
  4730. !isOperationLegalOrCustom(Cond, VT) ||
  4731. !isOperationLegalOrCustom(ISD::VSELECT, VT))
  4732. return SDValue();
  4733. Created.push_back(Fold.getNode());
  4734. SDValue IntMin = DAG.getConstant(
  4735. APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
  4736. SDValue IntMax = DAG.getConstant(
  4737. APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
  4738. SDValue Zero =
  4739. DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
  4740. // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
  4741. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
  4742. Created.push_back(DivisorIsIntMin.getNode());
  4743. // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
  4744. SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
  4745. Created.push_back(Masked.getNode());
  4746. SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
  4747. Created.push_back(MaskedIsZero.getNode());
  4748. // To produce final result we need to blend 2 vectors: 'SetCC' and
  4749. // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
  4750. // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
  4751. // constant-folded, select can get lowered to a shuffle with constant mask.
  4752. SDValue Blended =
  4753. DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
  4754. return Blended;
  4755. }
  4756. bool TargetLowering::
  4757. verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
  4758. if (!isa<ConstantSDNode>(Op.getOperand(0))) {
  4759. DAG.getContext()->emitError("argument to '__builtin_return_address' must "
  4760. "be a constant integer");
  4761. return true;
  4762. }
  4763. return false;
  4764. }
  4765. //===----------------------------------------------------------------------===//
  4766. // Legalization Utilities
  4767. //===----------------------------------------------------------------------===//
  4768. bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
  4769. SDValue LHS, SDValue RHS,
  4770. SmallVectorImpl<SDValue> &Result,
  4771. EVT HiLoVT, SelectionDAG &DAG,
  4772. MulExpansionKind Kind, SDValue LL,
  4773. SDValue LH, SDValue RL, SDValue RH) const {
  4774. assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
  4775. Opcode == ISD::SMUL_LOHI);
  4776. bool HasMULHS = (Kind == MulExpansionKind::Always) ||
  4777. isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
  4778. bool HasMULHU = (Kind == MulExpansionKind::Always) ||
  4779. isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
  4780. bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  4781. isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
  4782. bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
  4783. isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
  4784. if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
  4785. return false;
  4786. unsigned OuterBitSize = VT.getScalarSizeInBits();
  4787. unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
  4788. unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
  4789. unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
  4790. // LL, LH, RL, and RH must be either all NULL or all set to a value.
  4791. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
  4792. (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
  4793. SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
  4794. auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
  4795. bool Signed) -> bool {
  4796. if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
  4797. Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
  4798. Hi = SDValue(Lo.getNode(), 1);
  4799. return true;
  4800. }
  4801. if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
  4802. Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
  4803. Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
  4804. return true;
  4805. }
  4806. return false;
  4807. };
  4808. SDValue Lo, Hi;
  4809. if (!LL.getNode() && !RL.getNode() &&
  4810. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  4811. LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
  4812. RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
  4813. }
  4814. if (!LL.getNode())
  4815. return false;
  4816. APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
  4817. if (DAG.MaskedValueIsZero(LHS, HighMask) &&
  4818. DAG.MaskedValueIsZero(RHS, HighMask)) {
  4819. // The inputs are both zero-extended.
  4820. if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
  4821. Result.push_back(Lo);
  4822. Result.push_back(Hi);
  4823. if (Opcode != ISD::MUL) {
  4824. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  4825. Result.push_back(Zero);
  4826. Result.push_back(Zero);
  4827. }
  4828. return true;
  4829. }
  4830. }
  4831. if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
  4832. RHSSB > InnerBitSize) {
  4833. // The input values are both sign-extended.
  4834. // TODO non-MUL case?
  4835. if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
  4836. Result.push_back(Lo);
  4837. Result.push_back(Hi);
  4838. return true;
  4839. }
  4840. }
  4841. unsigned ShiftAmount = OuterBitSize - InnerBitSize;
  4842. EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
  4843. if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
  4844. // FIXME getShiftAmountTy does not always return a sensible result when VT
  4845. // is an illegal type, and so the type may be too small to fit the shift
  4846. // amount. Override it with i32. The shift will have to be legalized.
  4847. ShiftAmountTy = MVT::i32;
  4848. }
  4849. SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
  4850. if (!LH.getNode() && !RH.getNode() &&
  4851. isOperationLegalOrCustom(ISD::SRL, VT) &&
  4852. isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
  4853. LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
  4854. LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
  4855. RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
  4856. RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
  4857. }
  4858. if (!LH.getNode())
  4859. return false;
  4860. if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
  4861. return false;
  4862. Result.push_back(Lo);
  4863. if (Opcode == ISD::MUL) {
  4864. RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
  4865. LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
  4866. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
  4867. Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
  4868. Result.push_back(Hi);
  4869. return true;
  4870. }
  4871. // Compute the full width result.
  4872. auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
  4873. Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
  4874. Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  4875. Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
  4876. return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
  4877. };
  4878. SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
  4879. if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
  4880. return false;
  4881. // This is effectively the add part of a multiply-add of half-sized operands,
  4882. // so it cannot overflow.
  4883. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  4884. if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
  4885. return false;
  4886. SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
  4887. EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  4888. bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
  4889. isOperationLegalOrCustom(ISD::ADDE, VT));
  4890. if (UseGlue)
  4891. Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
  4892. Merge(Lo, Hi));
  4893. else
  4894. Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
  4895. Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
  4896. SDValue Carry = Next.getValue(1);
  4897. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4898. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  4899. if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
  4900. return false;
  4901. if (UseGlue)
  4902. Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
  4903. Carry);
  4904. else
  4905. Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
  4906. Zero, Carry);
  4907. Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
  4908. if (Opcode == ISD::SMUL_LOHI) {
  4909. SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  4910. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
  4911. Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
  4912. NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
  4913. DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
  4914. Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
  4915. }
  4916. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4917. Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
  4918. Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
  4919. return true;
  4920. }
  4921. bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
  4922. SelectionDAG &DAG, MulExpansionKind Kind,
  4923. SDValue LL, SDValue LH, SDValue RL,
  4924. SDValue RH) const {
  4925. SmallVector<SDValue, 2> Result;
  4926. bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
  4927. N->getOperand(0), N->getOperand(1), Result, HiLoVT,
  4928. DAG, Kind, LL, LH, RL, RH);
  4929. if (Ok) {
  4930. assert(Result.size() == 2);
  4931. Lo = Result[0];
  4932. Hi = Result[1];
  4933. }
  4934. return Ok;
  4935. }
  4936. bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
  4937. SelectionDAG &DAG) const {
  4938. EVT VT = Node->getValueType(0);
  4939. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  4940. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4941. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4942. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  4943. return false;
  4944. // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
  4945. // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
  4946. SDValue X = Node->getOperand(0);
  4947. SDValue Y = Node->getOperand(1);
  4948. SDValue Z = Node->getOperand(2);
  4949. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  4950. bool IsFSHL = Node->getOpcode() == ISD::FSHL;
  4951. SDLoc DL(SDValue(Node, 0));
  4952. EVT ShVT = Z.getValueType();
  4953. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  4954. SDValue Zero = DAG.getConstant(0, DL, ShVT);
  4955. SDValue ShAmt;
  4956. if (isPowerOf2_32(EltSizeInBits)) {
  4957. SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  4958. ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
  4959. } else {
  4960. ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
  4961. }
  4962. SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
  4963. SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
  4964. SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
  4965. SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
  4966. // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
  4967. // and that is undefined. We must compare and select to avoid UB.
  4968. EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
  4969. // For fshl, 0-shift returns the 1st arg (X).
  4970. // For fshr, 0-shift returns the 2nd arg (Y).
  4971. SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
  4972. Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
  4973. return true;
  4974. }
  4975. // TODO: Merge with expandFunnelShift.
  4976. bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
  4977. SelectionDAG &DAG) const {
  4978. EVT VT = Node->getValueType(0);
  4979. unsigned EltSizeInBits = VT.getScalarSizeInBits();
  4980. bool IsLeft = Node->getOpcode() == ISD::ROTL;
  4981. SDValue Op0 = Node->getOperand(0);
  4982. SDValue Op1 = Node->getOperand(1);
  4983. SDLoc DL(SDValue(Node, 0));
  4984. EVT ShVT = Op1.getValueType();
  4985. SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
  4986. // If a rotate in the other direction is legal, use it.
  4987. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
  4988. if (isOperationLegal(RevRot, VT)) {
  4989. SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
  4990. Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
  4991. return true;
  4992. }
  4993. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
  4994. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  4995. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  4996. !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
  4997. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  4998. return false;
  4999. // Otherwise,
  5000. // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
  5001. // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
  5002. //
  5003. assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
  5004. "Expecting the type bitwidth to be a power of 2");
  5005. unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
  5006. unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
  5007. SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
  5008. SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
  5009. SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
  5010. SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
  5011. Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
  5012. DAG.getNode(HsOpc, DL, VT, Op0, And1));
  5013. return true;
  5014. }
  5015. bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
  5016. SelectionDAG &DAG) const {
  5017. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  5018. SDValue Src = Node->getOperand(OpNo);
  5019. EVT SrcVT = Src.getValueType();
  5020. EVT DstVT = Node->getValueType(0);
  5021. SDLoc dl(SDValue(Node, 0));
  5022. // FIXME: Only f32 to i64 conversions are supported.
  5023. if (SrcVT != MVT::f32 || DstVT != MVT::i64)
  5024. return false;
  5025. if (Node->isStrictFPOpcode())
  5026. // When a NaN is converted to an integer a trap is allowed. We can't
  5027. // use this expansion here because it would eliminate that trap. Other
  5028. // traps are also allowed and cannot be eliminated. See
  5029. // IEEE 754-2008 sec 5.8.
  5030. return false;
  5031. // Expand f32 -> i64 conversion
  5032. // This algorithm comes from compiler-rt's implementation of fixsfdi:
  5033. // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
  5034. unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
  5035. EVT IntVT = SrcVT.changeTypeToInteger();
  5036. EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
  5037. SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
  5038. SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
  5039. SDValue Bias = DAG.getConstant(127, dl, IntVT);
  5040. SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
  5041. SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
  5042. SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
  5043. SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
  5044. SDValue ExponentBits = DAG.getNode(
  5045. ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
  5046. DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
  5047. SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
  5048. SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
  5049. DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
  5050. DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
  5051. Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
  5052. SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
  5053. DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
  5054. DAG.getConstant(0x00800000, dl, IntVT));
  5055. R = DAG.getZExtOrTrunc(R, dl, DstVT);
  5056. R = DAG.getSelectCC(
  5057. dl, Exponent, ExponentLoBit,
  5058. DAG.getNode(ISD::SHL, dl, DstVT, R,
  5059. DAG.getZExtOrTrunc(
  5060. DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
  5061. dl, IntShVT)),
  5062. DAG.getNode(ISD::SRL, dl, DstVT, R,
  5063. DAG.getZExtOrTrunc(
  5064. DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
  5065. dl, IntShVT)),
  5066. ISD::SETGT);
  5067. SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
  5068. DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
  5069. Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
  5070. DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
  5071. return true;
  5072. }
  5073. bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
  5074. SDValue &Chain,
  5075. SelectionDAG &DAG) const {
  5076. SDLoc dl(SDValue(Node, 0));
  5077. unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
  5078. SDValue Src = Node->getOperand(OpNo);
  5079. EVT SrcVT = Src.getValueType();
  5080. EVT DstVT = Node->getValueType(0);
  5081. EVT SetCCVT =
  5082. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  5083. // Only expand vector types if we have the appropriate vector bit operations.
  5084. unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
  5085. ISD::FP_TO_SINT;
  5086. if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
  5087. !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
  5088. return false;
  5089. // If the maximum float value is smaller then the signed integer range,
  5090. // the destination signmask can't be represented by the float, so we can
  5091. // just use FP_TO_SINT directly.
  5092. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
  5093. APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
  5094. APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
  5095. if (APFloat::opOverflow &
  5096. APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
  5097. if (Node->isStrictFPOpcode()) {
  5098. Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  5099. { Node->getOperand(0), Src });
  5100. Chain = Result.getValue(1);
  5101. } else
  5102. Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  5103. return true;
  5104. }
  5105. SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
  5106. SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
  5107. bool Strict = Node->isStrictFPOpcode() ||
  5108. shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
  5109. if (Strict) {
  5110. // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
  5111. // signmask then offset (the result of which should be fully representable).
  5112. // Sel = Src < 0x8000000000000000
  5113. // Val = select Sel, Src, Src - 0x8000000000000000
  5114. // Ofs = select Sel, 0, 0x8000000000000000
  5115. // Result = fp_to_sint(Val) ^ Ofs
  5116. // TODO: Should any fast-math-flags be set for the FSUB?
  5117. SDValue SrcBiased;
  5118. if (Node->isStrictFPOpcode())
  5119. SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
  5120. { Node->getOperand(0), Src, Cst });
  5121. else
  5122. SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
  5123. SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
  5124. SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
  5125. DAG.getConstant(SignMask, dl, DstVT));
  5126. SDValue SInt;
  5127. if (Node->isStrictFPOpcode()) {
  5128. SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
  5129. { SrcBiased.getValue(1), Val });
  5130. Chain = SInt.getValue(1);
  5131. } else
  5132. SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
  5133. Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
  5134. } else {
  5135. // Expand based on maximum range of FP_TO_SINT:
  5136. // True = fp_to_sint(Src)
  5137. // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
  5138. // Result = select (Src < 0x8000000000000000), True, False
  5139. SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
  5140. // TODO: Should any fast-math-flags be set for the FSUB?
  5141. SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
  5142. DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
  5143. False = DAG.getNode(ISD::XOR, dl, DstVT, False,
  5144. DAG.getConstant(SignMask, dl, DstVT));
  5145. Result = DAG.getSelect(dl, DstVT, Sel, True, False);
  5146. }
  5147. return true;
  5148. }
  5149. bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
  5150. SelectionDAG &DAG) const {
  5151. SDValue Src = Node->getOperand(0);
  5152. EVT SrcVT = Src.getValueType();
  5153. EVT DstVT = Node->getValueType(0);
  5154. if (SrcVT.getScalarType() != MVT::i64)
  5155. return false;
  5156. SDLoc dl(SDValue(Node, 0));
  5157. EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
  5158. if (DstVT.getScalarType() == MVT::f32) {
  5159. // Only expand vector types if we have the appropriate vector bit
  5160. // operations.
  5161. if (SrcVT.isVector() &&
  5162. (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  5163. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  5164. !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
  5165. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  5166. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  5167. return false;
  5168. // For unsigned conversions, convert them to signed conversions using the
  5169. // algorithm from the x86_64 __floatundidf in compiler_rt.
  5170. SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
  5171. SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
  5172. SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
  5173. SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
  5174. SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
  5175. SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
  5176. SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
  5177. SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
  5178. // TODO: This really should be implemented using a branch rather than a
  5179. // select. We happen to get lucky and machinesink does the right
  5180. // thing most of the time. This would be a good candidate for a
  5181. // pseudo-op, or, even better, for whole-function isel.
  5182. EVT SetCCVT =
  5183. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
  5184. SDValue SignBitTest = DAG.getSetCC(
  5185. dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
  5186. Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
  5187. return true;
  5188. }
  5189. if (DstVT.getScalarType() == MVT::f64) {
  5190. // Only expand vector types if we have the appropriate vector bit
  5191. // operations.
  5192. if (SrcVT.isVector() &&
  5193. (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
  5194. !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
  5195. !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
  5196. !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
  5197. !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
  5198. return false;
  5199. // Implementation of unsigned i64 to f64 following the algorithm in
  5200. // __floatundidf in compiler_rt. This implementation has the advantage
  5201. // of performing rounding correctly, both in the default rounding mode
  5202. // and in all alternate rounding modes.
  5203. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
  5204. SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
  5205. BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
  5206. SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
  5207. SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
  5208. SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
  5209. SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
  5210. SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
  5211. SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
  5212. SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
  5213. SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
  5214. SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
  5215. SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
  5216. Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
  5217. return true;
  5218. }
  5219. return false;
  5220. }
  5221. SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
  5222. SelectionDAG &DAG) const {
  5223. SDLoc dl(Node);
  5224. unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
  5225. ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
  5226. EVT VT = Node->getValueType(0);
  5227. if (isOperationLegalOrCustom(NewOp, VT)) {
  5228. SDValue Quiet0 = Node->getOperand(0);
  5229. SDValue Quiet1 = Node->getOperand(1);
  5230. if (!Node->getFlags().hasNoNaNs()) {
  5231. // Insert canonicalizes if it's possible we need to quiet to get correct
  5232. // sNaN behavior.
  5233. if (!DAG.isKnownNeverSNaN(Quiet0)) {
  5234. Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
  5235. Node->getFlags());
  5236. }
  5237. if (!DAG.isKnownNeverSNaN(Quiet1)) {
  5238. Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
  5239. Node->getFlags());
  5240. }
  5241. }
  5242. return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
  5243. }
  5244. // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
  5245. // instead if there are no NaNs.
  5246. if (Node->getFlags().hasNoNaNs()) {
  5247. unsigned IEEE2018Op =
  5248. Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
  5249. if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
  5250. return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
  5251. Node->getOperand(1), Node->getFlags());
  5252. }
  5253. }
  5254. return SDValue();
  5255. }
  5256. bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
  5257. SelectionDAG &DAG) const {
  5258. SDLoc dl(Node);
  5259. EVT VT = Node->getValueType(0);
  5260. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  5261. SDValue Op = Node->getOperand(0);
  5262. unsigned Len = VT.getScalarSizeInBits();
  5263. assert(VT.isInteger() && "CTPOP not implemented for this type.");
  5264. // TODO: Add support for irregular type lengths.
  5265. if (!(Len <= 128 && Len % 8 == 0))
  5266. return false;
  5267. // Only expand vector types if we have the appropriate vector bit operations.
  5268. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
  5269. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  5270. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  5271. (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
  5272. !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
  5273. return false;
  5274. // This is the "best" algorithm from
  5275. // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  5276. SDValue Mask55 =
  5277. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
  5278. SDValue Mask33 =
  5279. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
  5280. SDValue Mask0F =
  5281. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
  5282. SDValue Mask01 =
  5283. DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
  5284. // v = v - ((v >> 1) & 0x55555555...)
  5285. Op = DAG.getNode(ISD::SUB, dl, VT, Op,
  5286. DAG.getNode(ISD::AND, dl, VT,
  5287. DAG.getNode(ISD::SRL, dl, VT, Op,
  5288. DAG.getConstant(1, dl, ShVT)),
  5289. Mask55));
  5290. // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
  5291. Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
  5292. DAG.getNode(ISD::AND, dl, VT,
  5293. DAG.getNode(ISD::SRL, dl, VT, Op,
  5294. DAG.getConstant(2, dl, ShVT)),
  5295. Mask33));
  5296. // v = (v + (v >> 4)) & 0x0F0F0F0F...
  5297. Op = DAG.getNode(ISD::AND, dl, VT,
  5298. DAG.getNode(ISD::ADD, dl, VT, Op,
  5299. DAG.getNode(ISD::SRL, dl, VT, Op,
  5300. DAG.getConstant(4, dl, ShVT))),
  5301. Mask0F);
  5302. // v = (v * 0x01010101...) >> (Len - 8)
  5303. if (Len > 8)
  5304. Op =
  5305. DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
  5306. DAG.getConstant(Len - 8, dl, ShVT));
  5307. Result = Op;
  5308. return true;
  5309. }
  5310. bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
  5311. SelectionDAG &DAG) const {
  5312. SDLoc dl(Node);
  5313. EVT VT = Node->getValueType(0);
  5314. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  5315. SDValue Op = Node->getOperand(0);
  5316. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  5317. // If the non-ZERO_UNDEF version is supported we can use that instead.
  5318. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
  5319. isOperationLegalOrCustom(ISD::CTLZ, VT)) {
  5320. Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
  5321. return true;
  5322. }
  5323. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  5324. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
  5325. EVT SetCCVT =
  5326. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5327. SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
  5328. SDValue Zero = DAG.getConstant(0, dl, VT);
  5329. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  5330. Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
  5331. DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
  5332. return true;
  5333. }
  5334. // Only expand vector types if we have the appropriate vector bit operations.
  5335. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  5336. !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
  5337. !isOperationLegalOrCustom(ISD::SRL, VT) ||
  5338. !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
  5339. return false;
  5340. // for now, we do this:
  5341. // x = x | (x >> 1);
  5342. // x = x | (x >> 2);
  5343. // ...
  5344. // x = x | (x >>16);
  5345. // x = x | (x >>32); // for 64-bit input
  5346. // return popcount(~x);
  5347. //
  5348. // Ref: "Hacker's Delight" by Henry Warren
  5349. for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
  5350. SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
  5351. Op = DAG.getNode(ISD::OR, dl, VT, Op,
  5352. DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
  5353. }
  5354. Op = DAG.getNOT(dl, Op, VT);
  5355. Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
  5356. return true;
  5357. }
  5358. bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
  5359. SelectionDAG &DAG) const {
  5360. SDLoc dl(Node);
  5361. EVT VT = Node->getValueType(0);
  5362. SDValue Op = Node->getOperand(0);
  5363. unsigned NumBitsPerElt = VT.getScalarSizeInBits();
  5364. // If the non-ZERO_UNDEF version is supported we can use that instead.
  5365. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
  5366. isOperationLegalOrCustom(ISD::CTTZ, VT)) {
  5367. Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
  5368. return true;
  5369. }
  5370. // If the ZERO_UNDEF version is supported use that and handle the zero case.
  5371. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
  5372. EVT SetCCVT =
  5373. getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5374. SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
  5375. SDValue Zero = DAG.getConstant(0, dl, VT);
  5376. SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
  5377. Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
  5378. DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
  5379. return true;
  5380. }
  5381. // Only expand vector types if we have the appropriate vector bit operations.
  5382. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
  5383. (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
  5384. !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
  5385. !isOperationLegalOrCustom(ISD::SUB, VT) ||
  5386. !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
  5387. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  5388. return false;
  5389. // for now, we use: { return popcount(~x & (x - 1)); }
  5390. // unless the target has ctlz but not ctpop, in which case we use:
  5391. // { return 32 - nlz(~x & (x-1)); }
  5392. // Ref: "Hacker's Delight" by Henry Warren
  5393. SDValue Tmp = DAG.getNode(
  5394. ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
  5395. DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
  5396. // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
  5397. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
  5398. Result =
  5399. DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
  5400. DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
  5401. return true;
  5402. }
  5403. Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
  5404. return true;
  5405. }
  5406. bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
  5407. SelectionDAG &DAG) const {
  5408. SDLoc dl(N);
  5409. EVT VT = N->getValueType(0);
  5410. EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
  5411. SDValue Op = N->getOperand(0);
  5412. // Only expand vector types if we have the appropriate vector operations.
  5413. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
  5414. !isOperationLegalOrCustom(ISD::ADD, VT) ||
  5415. !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
  5416. return false;
  5417. SDValue Shift =
  5418. DAG.getNode(ISD::SRA, dl, VT, Op,
  5419. DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
  5420. SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
  5421. Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
  5422. return true;
  5423. }
  5424. SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
  5425. SelectionDAG &DAG) const {
  5426. SDLoc SL(LD);
  5427. SDValue Chain = LD->getChain();
  5428. SDValue BasePTR = LD->getBasePtr();
  5429. EVT SrcVT = LD->getMemoryVT();
  5430. ISD::LoadExtType ExtType = LD->getExtensionType();
  5431. unsigned NumElem = SrcVT.getVectorNumElements();
  5432. EVT SrcEltVT = SrcVT.getScalarType();
  5433. EVT DstEltVT = LD->getValueType(0).getScalarType();
  5434. unsigned Stride = SrcEltVT.getSizeInBits() / 8;
  5435. assert(SrcEltVT.isByteSized());
  5436. SmallVector<SDValue, 8> Vals;
  5437. SmallVector<SDValue, 8> LoadChains;
  5438. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  5439. SDValue ScalarLoad =
  5440. DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
  5441. LD->getPointerInfo().getWithOffset(Idx * Stride),
  5442. SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
  5443. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  5444. BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
  5445. Vals.push_back(ScalarLoad.getValue(0));
  5446. LoadChains.push_back(ScalarLoad.getValue(1));
  5447. }
  5448. SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
  5449. SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
  5450. return DAG.getMergeValues({Value, NewChain}, SL);
  5451. }
  5452. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
  5453. SelectionDAG &DAG) const {
  5454. SDLoc SL(ST);
  5455. SDValue Chain = ST->getChain();
  5456. SDValue BasePtr = ST->getBasePtr();
  5457. SDValue Value = ST->getValue();
  5458. EVT StVT = ST->getMemoryVT();
  5459. // The type of the data we want to save
  5460. EVT RegVT = Value.getValueType();
  5461. EVT RegSclVT = RegVT.getScalarType();
  5462. // The type of data as saved in memory.
  5463. EVT MemSclVT = StVT.getScalarType();
  5464. EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
  5465. unsigned NumElem = StVT.getVectorNumElements();
  5466. // A vector must always be stored in memory as-is, i.e. without any padding
  5467. // between the elements, since various code depend on it, e.g. in the
  5468. // handling of a bitcast of a vector type to int, which may be done with a
  5469. // vector store followed by an integer load. A vector that does not have
  5470. // elements that are byte-sized must therefore be stored as an integer
  5471. // built out of the extracted vector elements.
  5472. if (!MemSclVT.isByteSized()) {
  5473. unsigned NumBits = StVT.getSizeInBits();
  5474. EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
  5475. SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
  5476. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  5477. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  5478. DAG.getConstant(Idx, SL, IdxVT));
  5479. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
  5480. SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
  5481. unsigned ShiftIntoIdx =
  5482. (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
  5483. SDValue ShiftAmount =
  5484. DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
  5485. SDValue ShiftedElt =
  5486. DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
  5487. CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
  5488. }
  5489. return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
  5490. ST->getAlignment(), ST->getMemOperand()->getFlags(),
  5491. ST->getAAInfo());
  5492. }
  5493. // Store Stride in bytes
  5494. unsigned Stride = MemSclVT.getSizeInBits() / 8;
  5495. assert(Stride && "Zero stride!");
  5496. // Extract each of the elements from the original vector and save them into
  5497. // memory individually.
  5498. SmallVector<SDValue, 8> Stores;
  5499. for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
  5500. SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
  5501. DAG.getConstant(Idx, SL, IdxVT));
  5502. SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
  5503. // This scalar TruncStore may be illegal, but we legalize it later.
  5504. SDValue Store = DAG.getTruncStore(
  5505. Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
  5506. MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
  5507. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  5508. Stores.push_back(Store);
  5509. }
  5510. return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
  5511. }
  5512. std::pair<SDValue, SDValue>
  5513. TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
  5514. assert(LD->getAddressingMode() == ISD::UNINDEXED &&
  5515. "unaligned indexed loads not implemented!");
  5516. SDValue Chain = LD->getChain();
  5517. SDValue Ptr = LD->getBasePtr();
  5518. EVT VT = LD->getValueType(0);
  5519. EVT LoadedVT = LD->getMemoryVT();
  5520. SDLoc dl(LD);
  5521. auto &MF = DAG.getMachineFunction();
  5522. if (VT.isFloatingPoint() || VT.isVector()) {
  5523. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
  5524. if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
  5525. if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
  5526. LoadedVT.isVector()) {
  5527. // Scalarize the load and let the individual components be handled.
  5528. SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
  5529. if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
  5530. return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
  5531. return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
  5532. }
  5533. // Expand to a (misaligned) integer load of the same size,
  5534. // then bitconvert to floating point or vector.
  5535. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
  5536. LD->getMemOperand());
  5537. SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
  5538. if (LoadedVT != VT)
  5539. Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
  5540. ISD::ANY_EXTEND, dl, VT, Result);
  5541. return std::make_pair(Result, newLoad.getValue(1));
  5542. }
  5543. // Copy the value to a (aligned) stack slot using (unaligned) integer
  5544. // loads and stores, then do a (aligned) load from the stack slot.
  5545. MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
  5546. unsigned LoadedBytes = LoadedVT.getStoreSize();
  5547. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  5548. unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
  5549. // Make sure the stack slot is also aligned for the register type.
  5550. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
  5551. auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
  5552. SmallVector<SDValue, 8> Stores;
  5553. SDValue StackPtr = StackBase;
  5554. unsigned Offset = 0;
  5555. EVT PtrVT = Ptr.getValueType();
  5556. EVT StackPtrVT = StackPtr.getValueType();
  5557. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  5558. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  5559. // Do all but one copies using the full register width.
  5560. for (unsigned i = 1; i < NumRegs; i++) {
  5561. // Load one integer register's worth from the original location.
  5562. SDValue Load = DAG.getLoad(
  5563. RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
  5564. MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
  5565. LD->getAAInfo());
  5566. // Follow the load with a store to the stack slot. Remember the store.
  5567. Stores.push_back(DAG.getStore(
  5568. Load.getValue(1), dl, Load, StackPtr,
  5569. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
  5570. // Increment the pointers.
  5571. Offset += RegBytes;
  5572. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  5573. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  5574. }
  5575. // The last copy may be partial. Do an extending load.
  5576. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
  5577. 8 * (LoadedBytes - Offset));
  5578. SDValue Load =
  5579. DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
  5580. LD->getPointerInfo().getWithOffset(Offset), MemVT,
  5581. MinAlign(LD->getAlignment(), Offset),
  5582. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  5583. // Follow the load with a store to the stack slot. Remember the store.
  5584. // On big-endian machines this requires a truncating store to ensure
  5585. // that the bits end up in the right place.
  5586. Stores.push_back(DAG.getTruncStore(
  5587. Load.getValue(1), dl, Load, StackPtr,
  5588. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
  5589. // The order of the stores doesn't matter - say it with a TokenFactor.
  5590. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  5591. // Finally, perform the original load only redirected to the stack slot.
  5592. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
  5593. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
  5594. LoadedVT);
  5595. // Callers expect a MERGE_VALUES node.
  5596. return std::make_pair(Load, TF);
  5597. }
  5598. assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
  5599. "Unaligned load of unsupported type.");
  5600. // Compute the new VT that is half the size of the old one. This is an
  5601. // integer MVT.
  5602. unsigned NumBits = LoadedVT.getSizeInBits();
  5603. EVT NewLoadedVT;
  5604. NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
  5605. NumBits >>= 1;
  5606. unsigned Alignment = LD->getAlignment();
  5607. unsigned IncrementSize = NumBits / 8;
  5608. ISD::LoadExtType HiExtType = LD->getExtensionType();
  5609. // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
  5610. if (HiExtType == ISD::NON_EXTLOAD)
  5611. HiExtType = ISD::ZEXTLOAD;
  5612. // Load the value in two parts
  5613. SDValue Lo, Hi;
  5614. if (DAG.getDataLayout().isLittleEndian()) {
  5615. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  5616. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  5617. LD->getAAInfo());
  5618. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  5619. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
  5620. LD->getPointerInfo().getWithOffset(IncrementSize),
  5621. NewLoadedVT, MinAlign(Alignment, IncrementSize),
  5622. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  5623. } else {
  5624. Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
  5625. NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
  5626. LD->getAAInfo());
  5627. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  5628. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
  5629. LD->getPointerInfo().getWithOffset(IncrementSize),
  5630. NewLoadedVT, MinAlign(Alignment, IncrementSize),
  5631. LD->getMemOperand()->getFlags(), LD->getAAInfo());
  5632. }
  5633. // aggregate the two parts
  5634. SDValue ShiftAmount =
  5635. DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
  5636. DAG.getDataLayout()));
  5637. SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
  5638. Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
  5639. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
  5640. Hi.getValue(1));
  5641. return std::make_pair(Result, TF);
  5642. }
  5643. SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
  5644. SelectionDAG &DAG) const {
  5645. assert(ST->getAddressingMode() == ISD::UNINDEXED &&
  5646. "unaligned indexed stores not implemented!");
  5647. SDValue Chain = ST->getChain();
  5648. SDValue Ptr = ST->getBasePtr();
  5649. SDValue Val = ST->getValue();
  5650. EVT VT = Val.getValueType();
  5651. int Alignment = ST->getAlignment();
  5652. auto &MF = DAG.getMachineFunction();
  5653. EVT StoreMemVT = ST->getMemoryVT();
  5654. SDLoc dl(ST);
  5655. if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
  5656. EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
  5657. if (isTypeLegal(intVT)) {
  5658. if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
  5659. StoreMemVT.isVector()) {
  5660. // Scalarize the store and let the individual components be handled.
  5661. SDValue Result = scalarizeVectorStore(ST, DAG);
  5662. return Result;
  5663. }
  5664. // Expand to a bitconvert of the value to the integer type of the
  5665. // same size, then a (misaligned) int store.
  5666. // FIXME: Does not handle truncating floating point stores!
  5667. SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
  5668. Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
  5669. Alignment, ST->getMemOperand()->getFlags());
  5670. return Result;
  5671. }
  5672. // Do a (aligned) store to a stack slot, then copy from the stack slot
  5673. // to the final destination using (unaligned) integer loads and stores.
  5674. MVT RegVT = getRegisterType(
  5675. *DAG.getContext(),
  5676. EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
  5677. EVT PtrVT = Ptr.getValueType();
  5678. unsigned StoredBytes = StoreMemVT.getStoreSize();
  5679. unsigned RegBytes = RegVT.getSizeInBits() / 8;
  5680. unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
  5681. // Make sure the stack slot is also aligned for the register type.
  5682. SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
  5683. auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
  5684. // Perform the original store, only redirected to the stack slot.
  5685. SDValue Store = DAG.getTruncStore(
  5686. Chain, dl, Val, StackPtr,
  5687. MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
  5688. EVT StackPtrVT = StackPtr.getValueType();
  5689. SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
  5690. SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
  5691. SmallVector<SDValue, 8> Stores;
  5692. unsigned Offset = 0;
  5693. // Do all but one copies using the full register width.
  5694. for (unsigned i = 1; i < NumRegs; i++) {
  5695. // Load one integer register's worth from the stack slot.
  5696. SDValue Load = DAG.getLoad(
  5697. RegVT, dl, Store, StackPtr,
  5698. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
  5699. // Store it to the final location. Remember the store.
  5700. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
  5701. ST->getPointerInfo().getWithOffset(Offset),
  5702. MinAlign(ST->getAlignment(), Offset),
  5703. ST->getMemOperand()->getFlags()));
  5704. // Increment the pointers.
  5705. Offset += RegBytes;
  5706. StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
  5707. Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
  5708. }
  5709. // The last store may be partial. Do a truncating store. On big-endian
  5710. // machines this requires an extending load from the stack slot to ensure
  5711. // that the bits are in the right place.
  5712. EVT LoadMemVT =
  5713. EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
  5714. // Load from the stack slot.
  5715. SDValue Load = DAG.getExtLoad(
  5716. ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
  5717. MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
  5718. Stores.push_back(
  5719. DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
  5720. ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
  5721. MinAlign(ST->getAlignment(), Offset),
  5722. ST->getMemOperand()->getFlags(), ST->getAAInfo()));
  5723. // The order of the stores doesn't matter - say it with a TokenFactor.
  5724. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
  5725. return Result;
  5726. }
  5727. assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
  5728. "Unaligned store of unknown type.");
  5729. // Get the half-size VT
  5730. EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
  5731. int NumBits = NewStoredVT.getSizeInBits();
  5732. int IncrementSize = NumBits / 8;
  5733. // Divide the stored value in two parts.
  5734. SDValue ShiftAmount = DAG.getConstant(
  5735. NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
  5736. SDValue Lo = Val;
  5737. SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
  5738. // Store the two parts
  5739. SDValue Store1, Store2;
  5740. Store1 = DAG.getTruncStore(Chain, dl,
  5741. DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
  5742. Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
  5743. ST->getMemOperand()->getFlags());
  5744. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
  5745. Alignment = MinAlign(Alignment, IncrementSize);
  5746. Store2 = DAG.getTruncStore(
  5747. Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
  5748. ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
  5749. ST->getMemOperand()->getFlags(), ST->getAAInfo());
  5750. SDValue Result =
  5751. DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
  5752. return Result;
  5753. }
  5754. SDValue
  5755. TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
  5756. const SDLoc &DL, EVT DataVT,
  5757. SelectionDAG &DAG,
  5758. bool IsCompressedMemory) const {
  5759. SDValue Increment;
  5760. EVT AddrVT = Addr.getValueType();
  5761. EVT MaskVT = Mask.getValueType();
  5762. assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
  5763. "Incompatible types of Data and Mask");
  5764. if (IsCompressedMemory) {
  5765. // Incrementing the pointer according to number of '1's in the mask.
  5766. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
  5767. SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
  5768. if (MaskIntVT.getSizeInBits() < 32) {
  5769. MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
  5770. MaskIntVT = MVT::i32;
  5771. }
  5772. // Count '1's with POPCNT.
  5773. Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
  5774. Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
  5775. // Scale is an element size in bytes.
  5776. SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
  5777. AddrVT);
  5778. Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
  5779. } else
  5780. Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
  5781. return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
  5782. }
  5783. static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
  5784. SDValue Idx,
  5785. EVT VecVT,
  5786. const SDLoc &dl) {
  5787. if (isa<ConstantSDNode>(Idx))
  5788. return Idx;
  5789. EVT IdxVT = Idx.getValueType();
  5790. unsigned NElts = VecVT.getVectorNumElements();
  5791. if (isPowerOf2_32(NElts)) {
  5792. APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
  5793. Log2_32(NElts));
  5794. return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
  5795. DAG.getConstant(Imm, dl, IdxVT));
  5796. }
  5797. return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
  5798. DAG.getConstant(NElts - 1, dl, IdxVT));
  5799. }
  5800. SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
  5801. SDValue VecPtr, EVT VecVT,
  5802. SDValue Index) const {
  5803. SDLoc dl(Index);
  5804. // Make sure the index type is big enough to compute in.
  5805. Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
  5806. EVT EltVT = VecVT.getVectorElementType();
  5807. // Calculate the element offset and add it to the pointer.
  5808. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
  5809. assert(EltSize * 8 == EltVT.getSizeInBits() &&
  5810. "Converting bits to bytes lost precision");
  5811. Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
  5812. EVT IdxVT = Index.getValueType();
  5813. Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
  5814. DAG.getConstant(EltSize, dl, IdxVT));
  5815. return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
  5816. }
  5817. //===----------------------------------------------------------------------===//
  5818. // Implementation of Emulated TLS Model
  5819. //===----------------------------------------------------------------------===//
  5820. SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
  5821. SelectionDAG &DAG) const {
  5822. // Access to address of TLS varialbe xyz is lowered to a function call:
  5823. // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
  5824. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  5825. PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
  5826. SDLoc dl(GA);
  5827. ArgListTy Args;
  5828. ArgListEntry Entry;
  5829. std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
  5830. Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
  5831. StringRef EmuTlsVarName(NameString);
  5832. GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
  5833. assert(EmuTlsVar && "Cannot find EmuTlsVar ");
  5834. Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
  5835. Entry.Ty = VoidPtrType;
  5836. Args.push_back(Entry);
  5837. SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
  5838. TargetLowering::CallLoweringInfo CLI(DAG);
  5839. CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
  5840. CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
  5841. std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  5842. // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
  5843. // At last for X86 targets, maybe good for other targets too?
  5844. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
  5845. MFI.setAdjustsStack(true); // Is this only for X86 target?
  5846. MFI.setHasCalls(true);
  5847. assert((GA->getOffset() == 0) &&
  5848. "Emulated TLS must have zero offset in GlobalAddressSDNode");
  5849. return CallResult.first;
  5850. }
  5851. SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
  5852. SelectionDAG &DAG) const {
  5853. assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
  5854. if (!isCtlzFast())
  5855. return SDValue();
  5856. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  5857. SDLoc dl(Op);
  5858. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
  5859. if (C->isNullValue() && CC == ISD::SETEQ) {
  5860. EVT VT = Op.getOperand(0).getValueType();
  5861. SDValue Zext = Op.getOperand(0);
  5862. if (VT.bitsLT(MVT::i32)) {
  5863. VT = MVT::i32;
  5864. Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
  5865. }
  5866. unsigned Log2b = Log2_32(VT.getSizeInBits());
  5867. SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
  5868. SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
  5869. DAG.getConstant(Log2b, dl, MVT::i32));
  5870. return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
  5871. }
  5872. }
  5873. return SDValue();
  5874. }
  5875. SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
  5876. unsigned Opcode = Node->getOpcode();
  5877. SDValue LHS = Node->getOperand(0);
  5878. SDValue RHS = Node->getOperand(1);
  5879. EVT VT = LHS.getValueType();
  5880. SDLoc dl(Node);
  5881. assert(VT == RHS.getValueType() && "Expected operands to be the same type");
  5882. assert(VT.isInteger() && "Expected operands to be integers");
  5883. // usub.sat(a, b) -> umax(a, b) - b
  5884. if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
  5885. SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
  5886. return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
  5887. }
  5888. if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
  5889. SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
  5890. SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
  5891. return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
  5892. }
  5893. unsigned OverflowOp;
  5894. switch (Opcode) {
  5895. case ISD::SADDSAT:
  5896. OverflowOp = ISD::SADDO;
  5897. break;
  5898. case ISD::UADDSAT:
  5899. OverflowOp = ISD::UADDO;
  5900. break;
  5901. case ISD::SSUBSAT:
  5902. OverflowOp = ISD::SSUBO;
  5903. break;
  5904. case ISD::USUBSAT:
  5905. OverflowOp = ISD::USUBO;
  5906. break;
  5907. default:
  5908. llvm_unreachable("Expected method to receive signed or unsigned saturation "
  5909. "addition or subtraction node.");
  5910. }
  5911. unsigned BitWidth = LHS.getScalarValueSizeInBits();
  5912. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5913. SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
  5914. LHS, RHS);
  5915. SDValue SumDiff = Result.getValue(0);
  5916. SDValue Overflow = Result.getValue(1);
  5917. SDValue Zero = DAG.getConstant(0, dl, VT);
  5918. SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
  5919. if (Opcode == ISD::UADDSAT) {
  5920. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  5921. // (LHS + RHS) | OverflowMask
  5922. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  5923. return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
  5924. }
  5925. // Overflow ? 0xffff.... : (LHS + RHS)
  5926. return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
  5927. } else if (Opcode == ISD::USUBSAT) {
  5928. if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
  5929. // (LHS - RHS) & ~OverflowMask
  5930. SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
  5931. SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
  5932. return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
  5933. }
  5934. // Overflow ? 0 : (LHS - RHS)
  5935. return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
  5936. } else {
  5937. // SatMax -> Overflow && SumDiff < 0
  5938. // SatMin -> Overflow && SumDiff >= 0
  5939. APInt MinVal = APInt::getSignedMinValue(BitWidth);
  5940. APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
  5941. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  5942. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  5943. SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
  5944. Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
  5945. return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
  5946. }
  5947. }
  5948. SDValue
  5949. TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
  5950. assert((Node->getOpcode() == ISD::SMULFIX ||
  5951. Node->getOpcode() == ISD::UMULFIX ||
  5952. Node->getOpcode() == ISD::SMULFIXSAT ||
  5953. Node->getOpcode() == ISD::UMULFIXSAT) &&
  5954. "Expected a fixed point multiplication opcode");
  5955. SDLoc dl(Node);
  5956. SDValue LHS = Node->getOperand(0);
  5957. SDValue RHS = Node->getOperand(1);
  5958. EVT VT = LHS.getValueType();
  5959. unsigned Scale = Node->getConstantOperandVal(2);
  5960. bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
  5961. Node->getOpcode() == ISD::UMULFIXSAT);
  5962. bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
  5963. Node->getOpcode() == ISD::SMULFIXSAT);
  5964. EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  5965. unsigned VTSize = VT.getScalarSizeInBits();
  5966. if (!Scale) {
  5967. // [us]mul.fix(a, b, 0) -> mul(a, b)
  5968. if (!Saturating) {
  5969. if (isOperationLegalOrCustom(ISD::MUL, VT))
  5970. return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  5971. } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
  5972. SDValue Result =
  5973. DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  5974. SDValue Product = Result.getValue(0);
  5975. SDValue Overflow = Result.getValue(1);
  5976. SDValue Zero = DAG.getConstant(0, dl, VT);
  5977. APInt MinVal = APInt::getSignedMinValue(VTSize);
  5978. APInt MaxVal = APInt::getSignedMaxValue(VTSize);
  5979. SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
  5980. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  5981. SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
  5982. Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
  5983. return DAG.getSelect(dl, VT, Overflow, Result, Product);
  5984. } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
  5985. SDValue Result =
  5986. DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
  5987. SDValue Product = Result.getValue(0);
  5988. SDValue Overflow = Result.getValue(1);
  5989. APInt MaxVal = APInt::getMaxValue(VTSize);
  5990. SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
  5991. return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
  5992. }
  5993. }
  5994. assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
  5995. "Expected scale to be less than the number of bits if signed or at "
  5996. "most the number of bits if unsigned.");
  5997. assert(LHS.getValueType() == RHS.getValueType() &&
  5998. "Expected both operands to be the same type");
  5999. // Get the upper and lower bits of the result.
  6000. SDValue Lo, Hi;
  6001. unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
  6002. unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
  6003. if (isOperationLegalOrCustom(LoHiOp, VT)) {
  6004. SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
  6005. Lo = Result.getValue(0);
  6006. Hi = Result.getValue(1);
  6007. } else if (isOperationLegalOrCustom(HiOp, VT)) {
  6008. Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  6009. Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
  6010. } else if (VT.isVector()) {
  6011. return SDValue();
  6012. } else {
  6013. report_fatal_error("Unable to expand fixed point multiplication.");
  6014. }
  6015. if (Scale == VTSize)
  6016. // Result is just the top half since we'd be shifting by the width of the
  6017. // operand. Overflow impossible so this works for both UMULFIX and
  6018. // UMULFIXSAT.
  6019. return Hi;
  6020. // The result will need to be shifted right by the scale since both operands
  6021. // are scaled. The result is given to us in 2 halves, so we only want part of
  6022. // both in the result.
  6023. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
  6024. SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
  6025. DAG.getConstant(Scale, dl, ShiftTy));
  6026. if (!Saturating)
  6027. return Result;
  6028. if (!Signed) {
  6029. // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
  6030. // widened multiplication) aren't all zeroes.
  6031. // Saturate to max if ((Hi >> Scale) != 0),
  6032. // which is the same as if (Hi > ((1 << Scale) - 1))
  6033. APInt MaxVal = APInt::getMaxValue(VTSize);
  6034. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
  6035. dl, VT);
  6036. Result = DAG.getSelectCC(dl, Hi, LowMask,
  6037. DAG.getConstant(MaxVal, dl, VT), Result,
  6038. ISD::SETUGT);
  6039. return Result;
  6040. }
  6041. // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
  6042. // widened multiplication) aren't all ones or all zeroes.
  6043. SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
  6044. SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
  6045. if (Scale == 0) {
  6046. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
  6047. DAG.getConstant(VTSize - 1, dl, ShiftTy));
  6048. SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
  6049. // Saturated to SatMin if wide product is negative, and SatMax if wide
  6050. // product is positive ...
  6051. SDValue Zero = DAG.getConstant(0, dl, VT);
  6052. SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
  6053. ISD::SETLT);
  6054. // ... but only if we overflowed.
  6055. return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
  6056. }
  6057. // We handled Scale==0 above so all the bits to examine is in Hi.
  6058. // Saturate to max if ((Hi >> (Scale - 1)) > 0),
  6059. // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
  6060. SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
  6061. dl, VT);
  6062. Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
  6063. // Saturate to min if (Hi >> (Scale - 1)) < -1),
  6064. // which is the same as if (HI < (-1 << (Scale - 1))
  6065. SDValue HighMask =
  6066. DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
  6067. dl, VT);
  6068. Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
  6069. return Result;
  6070. }
  6071. void TargetLowering::expandUADDSUBO(
  6072. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  6073. SDLoc dl(Node);
  6074. SDValue LHS = Node->getOperand(0);
  6075. SDValue RHS = Node->getOperand(1);
  6076. bool IsAdd = Node->getOpcode() == ISD::UADDO;
  6077. // If ADD/SUBCARRY is legal, use that instead.
  6078. unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
  6079. if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
  6080. SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
  6081. SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
  6082. { LHS, RHS, CarryIn });
  6083. Result = SDValue(NodeCarry.getNode(), 0);
  6084. Overflow = SDValue(NodeCarry.getNode(), 1);
  6085. return;
  6086. }
  6087. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  6088. LHS.getValueType(), LHS, RHS);
  6089. EVT ResultType = Node->getValueType(1);
  6090. EVT SetCCType = getSetCCResultType(
  6091. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  6092. ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
  6093. SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
  6094. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  6095. }
  6096. void TargetLowering::expandSADDSUBO(
  6097. SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
  6098. SDLoc dl(Node);
  6099. SDValue LHS = Node->getOperand(0);
  6100. SDValue RHS = Node->getOperand(1);
  6101. bool IsAdd = Node->getOpcode() == ISD::SADDO;
  6102. Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
  6103. LHS.getValueType(), LHS, RHS);
  6104. EVT ResultType = Node->getValueType(1);
  6105. EVT OType = getSetCCResultType(
  6106. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  6107. // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
  6108. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
  6109. if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
  6110. SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
  6111. SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
  6112. Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
  6113. return;
  6114. }
  6115. SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
  6116. // LHSSign -> LHS >= 0
  6117. // RHSSign -> RHS >= 0
  6118. // SumSign -> Result >= 0
  6119. //
  6120. // Add:
  6121. // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
  6122. // Sub:
  6123. // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
  6124. SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
  6125. SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
  6126. SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
  6127. IsAdd ? ISD::SETEQ : ISD::SETNE);
  6128. SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
  6129. SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
  6130. SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
  6131. Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
  6132. }
  6133. bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
  6134. SDValue &Overflow, SelectionDAG &DAG) const {
  6135. SDLoc dl(Node);
  6136. EVT VT = Node->getValueType(0);
  6137. EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
  6138. SDValue LHS = Node->getOperand(0);
  6139. SDValue RHS = Node->getOperand(1);
  6140. bool isSigned = Node->getOpcode() == ISD::SMULO;
  6141. // For power-of-two multiplications we can use a simpler shift expansion.
  6142. if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
  6143. const APInt &C = RHSC->getAPIntValue();
  6144. // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
  6145. if (C.isPowerOf2()) {
  6146. // smulo(x, signed_min) is same as umulo(x, signed_min).
  6147. bool UseArithShift = isSigned && !C.isMinSignedValue();
  6148. EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
  6149. SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
  6150. Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
  6151. Overflow = DAG.getSetCC(dl, SetCCVT,
  6152. DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
  6153. dl, VT, Result, ShiftAmt),
  6154. LHS, ISD::SETNE);
  6155. return true;
  6156. }
  6157. }
  6158. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
  6159. if (VT.isVector())
  6160. WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
  6161. VT.getVectorNumElements());
  6162. SDValue BottomHalf;
  6163. SDValue TopHalf;
  6164. static const unsigned Ops[2][3] =
  6165. { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
  6166. { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
  6167. if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
  6168. BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
  6169. TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
  6170. } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
  6171. BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
  6172. RHS);
  6173. TopHalf = BottomHalf.getValue(1);
  6174. } else if (isTypeLegal(WideVT)) {
  6175. LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
  6176. RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
  6177. SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
  6178. BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
  6179. SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
  6180. getShiftAmountTy(WideVT, DAG.getDataLayout()));
  6181. TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
  6182. DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
  6183. } else {
  6184. if (VT.isVector())
  6185. return false;
  6186. // We can fall back to a libcall with an illegal type for the MUL if we
  6187. // have a libcall big enough.
  6188. // Also, we can fall back to a division in some cases, but that's a big
  6189. // performance hit in the general case.
  6190. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  6191. if (WideVT == MVT::i16)
  6192. LC = RTLIB::MUL_I16;
  6193. else if (WideVT == MVT::i32)
  6194. LC = RTLIB::MUL_I32;
  6195. else if (WideVT == MVT::i64)
  6196. LC = RTLIB::MUL_I64;
  6197. else if (WideVT == MVT::i128)
  6198. LC = RTLIB::MUL_I128;
  6199. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
  6200. SDValue HiLHS;
  6201. SDValue HiRHS;
  6202. if (isSigned) {
  6203. // The high part is obtained by SRA'ing all but one of the bits of low
  6204. // part.
  6205. unsigned LoSize = VT.getSizeInBits();
  6206. HiLHS =
  6207. DAG.getNode(ISD::SRA, dl, VT, LHS,
  6208. DAG.getConstant(LoSize - 1, dl,
  6209. getPointerTy(DAG.getDataLayout())));
  6210. HiRHS =
  6211. DAG.getNode(ISD::SRA, dl, VT, RHS,
  6212. DAG.getConstant(LoSize - 1, dl,
  6213. getPointerTy(DAG.getDataLayout())));
  6214. } else {
  6215. HiLHS = DAG.getConstant(0, dl, VT);
  6216. HiRHS = DAG.getConstant(0, dl, VT);
  6217. }
  6218. // Here we're passing the 2 arguments explicitly as 4 arguments that are
  6219. // pre-lowered to the correct types. This all depends upon WideVT not
  6220. // being a legal type for the architecture and thus has to be split to
  6221. // two arguments.
  6222. SDValue Ret;
  6223. TargetLowering::MakeLibCallOptions CallOptions;
  6224. CallOptions.setSExt(isSigned);
  6225. CallOptions.setIsPostTypeLegalization(true);
  6226. if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
  6227. // Halves of WideVT are packed into registers in different order
  6228. // depending on platform endianness. This is usually handled by
  6229. // the C calling convention, but we can't defer to it in
  6230. // the legalizer.
  6231. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
  6232. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  6233. } else {
  6234. SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
  6235. Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
  6236. }
  6237. assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
  6238. "Ret value is a collection of constituent nodes holding result.");
  6239. if (DAG.getDataLayout().isLittleEndian()) {
  6240. // Same as above.
  6241. BottomHalf = Ret.getOperand(0);
  6242. TopHalf = Ret.getOperand(1);
  6243. } else {
  6244. BottomHalf = Ret.getOperand(1);
  6245. TopHalf = Ret.getOperand(0);
  6246. }
  6247. }
  6248. Result = BottomHalf;
  6249. if (isSigned) {
  6250. SDValue ShiftAmt = DAG.getConstant(
  6251. VT.getScalarSizeInBits() - 1, dl,
  6252. getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
  6253. SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
  6254. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
  6255. } else {
  6256. Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
  6257. DAG.getConstant(0, dl, VT), ISD::SETNE);
  6258. }
  6259. // Truncate the result if SetCC returns a larger type than needed.
  6260. EVT RType = Node->getValueType(1);
  6261. if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
  6262. Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
  6263. assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
  6264. "Unexpected result type for S/UMULO legalization");
  6265. return true;
  6266. }
  6267. SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
  6268. SDLoc dl(Node);
  6269. bool NoNaN = Node->getFlags().hasNoNaNs();
  6270. unsigned BaseOpcode = 0;
  6271. switch (Node->getOpcode()) {
  6272. default: llvm_unreachable("Expected VECREDUCE opcode");
  6273. case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
  6274. case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
  6275. case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
  6276. case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
  6277. case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
  6278. case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
  6279. case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
  6280. case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
  6281. case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
  6282. case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
  6283. case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
  6284. case ISD::VECREDUCE_FMAX:
  6285. BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
  6286. break;
  6287. case ISD::VECREDUCE_FMIN:
  6288. BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
  6289. break;
  6290. }
  6291. SDValue Op = Node->getOperand(0);
  6292. EVT VT = Op.getValueType();
  6293. // Try to use a shuffle reduction for power of two vectors.
  6294. if (VT.isPow2VectorType()) {
  6295. while (VT.getVectorNumElements() > 1) {
  6296. EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
  6297. if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
  6298. break;
  6299. SDValue Lo, Hi;
  6300. std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
  6301. Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
  6302. VT = HalfVT;
  6303. }
  6304. }
  6305. EVT EltVT = VT.getVectorElementType();
  6306. unsigned NumElts = VT.getVectorNumElements();
  6307. SmallVector<SDValue, 8> Ops;
  6308. DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
  6309. SDValue Res = Ops[0];
  6310. for (unsigned i = 1; i < NumElts; i++)
  6311. Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
  6312. // Result type may be wider than element type.
  6313. if (EltVT != Node->getValueType(0))
  6314. Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
  6315. return Res;
  6316. }