CGOpenMPRuntime.cpp 487 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578
  1. //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This provides a class for OpenMP runtime code generation.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "CGCXXABI.h"
  13. #include "CGCleanup.h"
  14. #include "CGOpenMPRuntime.h"
  15. #include "CGRecordLayout.h"
  16. #include "CodeGenFunction.h"
  17. #include "clang/CodeGen/ConstantInitBuilder.h"
  18. #include "clang/AST/Decl.h"
  19. #include "clang/AST/StmtOpenMP.h"
  20. #include "clang/Basic/BitmaskEnum.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/Bitcode/BitcodeReader.h"
  23. #include "llvm/IR/DerivedTypes.h"
  24. #include "llvm/IR/GlobalValue.h"
  25. #include "llvm/IR/Value.h"
  26. #include "llvm/Support/Format.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include <cassert>
  29. using namespace clang;
  30. using namespace CodeGen;
  31. namespace {
  32. /// Base class for handling code generation inside OpenMP regions.
  33. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
  34. public:
  35. /// Kinds of OpenMP regions used in codegen.
  36. enum CGOpenMPRegionKind {
  37. /// Region with outlined function for standalone 'parallel'
  38. /// directive.
  39. ParallelOutlinedRegion,
  40. /// Region with outlined function for standalone 'task' directive.
  41. TaskOutlinedRegion,
  42. /// Region for constructs that do not require function outlining,
  43. /// like 'for', 'sections', 'atomic' etc. directives.
  44. InlinedRegion,
  45. /// Region with outlined function for standalone 'target' directive.
  46. TargetRegion,
  47. };
  48. CGOpenMPRegionInfo(const CapturedStmt &CS,
  49. const CGOpenMPRegionKind RegionKind,
  50. const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
  51. bool HasCancel)
  52. : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
  53. CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
  54. CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
  55. const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
  56. bool HasCancel)
  57. : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
  58. Kind(Kind), HasCancel(HasCancel) {}
  59. /// Get a variable or parameter for storing global thread id
  60. /// inside OpenMP construct.
  61. virtual const VarDecl *getThreadIDVariable() const = 0;
  62. /// Emit the captured statement body.
  63. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
  64. /// Get an LValue for the current ThreadID variable.
  65. /// \return LValue for thread id variable. This LValue always has type int32*.
  66. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
  67. virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
  68. CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
  69. OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
  70. bool hasCancel() const { return HasCancel; }
  71. static bool classof(const CGCapturedStmtInfo *Info) {
  72. return Info->getKind() == CR_OpenMP;
  73. }
  74. ~CGOpenMPRegionInfo() override = default;
  75. protected:
  76. CGOpenMPRegionKind RegionKind;
  77. RegionCodeGenTy CodeGen;
  78. OpenMPDirectiveKind Kind;
  79. bool HasCancel;
  80. };
  81. /// API for captured statement code generation in OpenMP constructs.
  82. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
  83. public:
  84. CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
  85. const RegionCodeGenTy &CodeGen,
  86. OpenMPDirectiveKind Kind, bool HasCancel,
  87. StringRef HelperName)
  88. : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
  89. HasCancel),
  90. ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
  91. assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  92. }
  93. /// Get a variable or parameter for storing global thread id
  94. /// inside OpenMP construct.
  95. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
  96. /// Get the name of the capture helper.
  97. StringRef getHelperName() const override { return HelperName; }
  98. static bool classof(const CGCapturedStmtInfo *Info) {
  99. return CGOpenMPRegionInfo::classof(Info) &&
  100. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
  101. ParallelOutlinedRegion;
  102. }
  103. private:
  104. /// A variable or parameter storing global thread id for OpenMP
  105. /// constructs.
  106. const VarDecl *ThreadIDVar;
  107. StringRef HelperName;
  108. };
  109. /// API for captured statement code generation in OpenMP constructs.
  110. class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
  111. public:
  112. class UntiedTaskActionTy final : public PrePostActionTy {
  113. bool Untied;
  114. const VarDecl *PartIDVar;
  115. const RegionCodeGenTy UntiedCodeGen;
  116. llvm::SwitchInst *UntiedSwitch = nullptr;
  117. public:
  118. UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
  119. const RegionCodeGenTy &UntiedCodeGen)
  120. : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
  121. void Enter(CodeGenFunction &CGF) override {
  122. if (Untied) {
  123. // Emit task switching point.
  124. LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
  125. CGF.GetAddrOfLocalVar(PartIDVar),
  126. PartIDVar->getType()->castAs<PointerType>());
  127. llvm::Value *Res =
  128. CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
  129. llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
  130. UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
  131. CGF.EmitBlock(DoneBB);
  132. CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
  133. CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
  134. UntiedSwitch->addCase(CGF.Builder.getInt32(0),
  135. CGF.Builder.GetInsertBlock());
  136. emitUntiedSwitch(CGF);
  137. }
  138. }
  139. void emitUntiedSwitch(CodeGenFunction &CGF) const {
  140. if (Untied) {
  141. LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
  142. CGF.GetAddrOfLocalVar(PartIDVar),
  143. PartIDVar->getType()->castAs<PointerType>());
  144. CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
  145. PartIdLVal);
  146. UntiedCodeGen(CGF);
  147. CodeGenFunction::JumpDest CurPoint =
  148. CGF.getJumpDestInCurrentScope(".untied.next.");
  149. CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
  150. CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
  151. UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
  152. CGF.Builder.GetInsertBlock());
  153. CGF.EmitBranchThroughCleanup(CurPoint);
  154. CGF.EmitBlock(CurPoint.getBlock());
  155. }
  156. }
  157. unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
  158. };
  159. CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
  160. const VarDecl *ThreadIDVar,
  161. const RegionCodeGenTy &CodeGen,
  162. OpenMPDirectiveKind Kind, bool HasCancel,
  163. const UntiedTaskActionTy &Action)
  164. : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
  165. ThreadIDVar(ThreadIDVar), Action(Action) {
  166. assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
  167. }
  168. /// Get a variable or parameter for storing global thread id
  169. /// inside OpenMP construct.
  170. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
  171. /// Get an LValue for the current ThreadID variable.
  172. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
  173. /// Get the name of the capture helper.
  174. StringRef getHelperName() const override { return ".omp_outlined."; }
  175. void emitUntiedSwitch(CodeGenFunction &CGF) override {
  176. Action.emitUntiedSwitch(CGF);
  177. }
  178. static bool classof(const CGCapturedStmtInfo *Info) {
  179. return CGOpenMPRegionInfo::classof(Info) &&
  180. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
  181. TaskOutlinedRegion;
  182. }
  183. private:
  184. /// A variable or parameter storing global thread id for OpenMP
  185. /// constructs.
  186. const VarDecl *ThreadIDVar;
  187. /// Action for emitting code for untied tasks.
  188. const UntiedTaskActionTy &Action;
  189. };
  190. /// API for inlined captured statement code generation in OpenMP
  191. /// constructs.
  192. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
  193. public:
  194. CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
  195. const RegionCodeGenTy &CodeGen,
  196. OpenMPDirectiveKind Kind, bool HasCancel)
  197. : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
  198. OldCSI(OldCSI),
  199. OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
  200. // Retrieve the value of the context parameter.
  201. llvm::Value *getContextValue() const override {
  202. if (OuterRegionInfo)
  203. return OuterRegionInfo->getContextValue();
  204. llvm_unreachable("No context value for inlined OpenMP region");
  205. }
  206. void setContextValue(llvm::Value *V) override {
  207. if (OuterRegionInfo) {
  208. OuterRegionInfo->setContextValue(V);
  209. return;
  210. }
  211. llvm_unreachable("No context value for inlined OpenMP region");
  212. }
  213. /// Lookup the captured field decl for a variable.
  214. const FieldDecl *lookup(const VarDecl *VD) const override {
  215. if (OuterRegionInfo)
  216. return OuterRegionInfo->lookup(VD);
  217. // If there is no outer outlined region,no need to lookup in a list of
  218. // captured variables, we can use the original one.
  219. return nullptr;
  220. }
  221. FieldDecl *getThisFieldDecl() const override {
  222. if (OuterRegionInfo)
  223. return OuterRegionInfo->getThisFieldDecl();
  224. return nullptr;
  225. }
  226. /// Get a variable or parameter for storing global thread id
  227. /// inside OpenMP construct.
  228. const VarDecl *getThreadIDVariable() const override {
  229. if (OuterRegionInfo)
  230. return OuterRegionInfo->getThreadIDVariable();
  231. return nullptr;
  232. }
  233. /// Get an LValue for the current ThreadID variable.
  234. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
  235. if (OuterRegionInfo)
  236. return OuterRegionInfo->getThreadIDVariableLValue(CGF);
  237. llvm_unreachable("No LValue for inlined OpenMP construct");
  238. }
  239. /// Get the name of the capture helper.
  240. StringRef getHelperName() const override {
  241. if (auto *OuterRegionInfo = getOldCSI())
  242. return OuterRegionInfo->getHelperName();
  243. llvm_unreachable("No helper name for inlined OpenMP construct");
  244. }
  245. void emitUntiedSwitch(CodeGenFunction &CGF) override {
  246. if (OuterRegionInfo)
  247. OuterRegionInfo->emitUntiedSwitch(CGF);
  248. }
  249. CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
  250. static bool classof(const CGCapturedStmtInfo *Info) {
  251. return CGOpenMPRegionInfo::classof(Info) &&
  252. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
  253. }
  254. ~CGOpenMPInlinedRegionInfo() override = default;
  255. private:
  256. /// CodeGen info about outer OpenMP region.
  257. CodeGenFunction::CGCapturedStmtInfo *OldCSI;
  258. CGOpenMPRegionInfo *OuterRegionInfo;
  259. };
  260. /// API for captured statement code generation in OpenMP target
  261. /// constructs. For this captures, implicit parameters are used instead of the
  262. /// captured fields. The name of the target region has to be unique in a given
  263. /// application so it is provided by the client, because only the client has
  264. /// the information to generate that.
  265. class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
  266. public:
  267. CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
  268. const RegionCodeGenTy &CodeGen, StringRef HelperName)
  269. : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
  270. /*HasCancel=*/false),
  271. HelperName(HelperName) {}
  272. /// This is unused for target regions because each starts executing
  273. /// with a single thread.
  274. const VarDecl *getThreadIDVariable() const override { return nullptr; }
  275. /// Get the name of the capture helper.
  276. StringRef getHelperName() const override { return HelperName; }
  277. static bool classof(const CGCapturedStmtInfo *Info) {
  278. return CGOpenMPRegionInfo::classof(Info) &&
  279. cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
  280. }
  281. private:
  282. StringRef HelperName;
  283. };
  284. static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
  285. llvm_unreachable("No codegen for expressions");
  286. }
  287. /// API for generation of expressions captured in a innermost OpenMP
  288. /// region.
  289. class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
  290. public:
  291. CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
  292. : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
  293. OMPD_unknown,
  294. /*HasCancel=*/false),
  295. PrivScope(CGF) {
  296. // Make sure the globals captured in the provided statement are local by
  297. // using the privatization logic. We assume the same variable is not
  298. // captured more than once.
  299. for (const auto &C : CS.captures()) {
  300. if (!C.capturesVariable() && !C.capturesVariableByCopy())
  301. continue;
  302. const VarDecl *VD = C.getCapturedVar();
  303. if (VD->isLocalVarDeclOrParm())
  304. continue;
  305. DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
  306. /*RefersToEnclosingVariableOrCapture=*/false,
  307. VD->getType().getNonReferenceType(), VK_LValue,
  308. C.getLocation());
  309. PrivScope.addPrivate(
  310. VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
  311. }
  312. (void)PrivScope.Privatize();
  313. }
  314. /// Lookup the captured field decl for a variable.
  315. const FieldDecl *lookup(const VarDecl *VD) const override {
  316. if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
  317. return FD;
  318. return nullptr;
  319. }
  320. /// Emit the captured statement body.
  321. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
  322. llvm_unreachable("No body for expressions");
  323. }
  324. /// Get a variable or parameter for storing global thread id
  325. /// inside OpenMP construct.
  326. const VarDecl *getThreadIDVariable() const override {
  327. llvm_unreachable("No thread id for expressions");
  328. }
  329. /// Get the name of the capture helper.
  330. StringRef getHelperName() const override {
  331. llvm_unreachable("No helper name for expressions");
  332. }
  333. static bool classof(const CGCapturedStmtInfo *Info) { return false; }
  334. private:
  335. /// Private scope to capture global variables.
  336. CodeGenFunction::OMPPrivateScope PrivScope;
  337. };
  338. /// RAII for emitting code of OpenMP constructs.
  339. class InlinedOpenMPRegionRAII {
  340. CodeGenFunction &CGF;
  341. llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
  342. FieldDecl *LambdaThisCaptureField = nullptr;
  343. const CodeGen::CGBlockInfo *BlockInfo = nullptr;
  344. public:
  345. /// Constructs region for combined constructs.
  346. /// \param CodeGen Code generation sequence for combined directives. Includes
  347. /// a list of functions used for code generation of implicitly inlined
  348. /// regions.
  349. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
  350. OpenMPDirectiveKind Kind, bool HasCancel)
  351. : CGF(CGF) {
  352. // Start emission for the construct.
  353. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
  354. CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
  355. std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
  356. LambdaThisCaptureField = CGF.LambdaThisCaptureField;
  357. CGF.LambdaThisCaptureField = nullptr;
  358. BlockInfo = CGF.BlockInfo;
  359. CGF.BlockInfo = nullptr;
  360. }
  361. ~InlinedOpenMPRegionRAII() {
  362. // Restore original CapturedStmtInfo only if we're done with code emission.
  363. auto *OldCSI =
  364. cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
  365. delete CGF.CapturedStmtInfo;
  366. CGF.CapturedStmtInfo = OldCSI;
  367. std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
  368. CGF.LambdaThisCaptureField = LambdaThisCaptureField;
  369. CGF.BlockInfo = BlockInfo;
  370. }
  371. };
  372. /// Values for bit flags used in the ident_t to describe the fields.
  373. /// All enumeric elements are named and described in accordance with the code
  374. /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
  375. enum OpenMPLocationFlags : unsigned {
  376. /// Use trampoline for internal microtask.
  377. OMP_IDENT_IMD = 0x01,
  378. /// Use c-style ident structure.
  379. OMP_IDENT_KMPC = 0x02,
  380. /// Atomic reduction option for kmpc_reduce.
  381. OMP_ATOMIC_REDUCE = 0x10,
  382. /// Explicit 'barrier' directive.
  383. OMP_IDENT_BARRIER_EXPL = 0x20,
  384. /// Implicit barrier in code.
  385. OMP_IDENT_BARRIER_IMPL = 0x40,
  386. /// Implicit barrier in 'for' directive.
  387. OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
  388. /// Implicit barrier in 'sections' directive.
  389. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
  390. /// Implicit barrier in 'single' directive.
  391. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
  392. /// Call of __kmp_for_static_init for static loop.
  393. OMP_IDENT_WORK_LOOP = 0x200,
  394. /// Call of __kmp_for_static_init for sections.
  395. OMP_IDENT_WORK_SECTIONS = 0x400,
  396. /// Call of __kmp_for_static_init for distribute.
  397. OMP_IDENT_WORK_DISTRIBUTE = 0x800,
  398. LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
  399. };
  400. namespace {
  401. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
  402. /// Values for bit flags for marking which requires clauses have been used.
  403. enum OpenMPOffloadingRequiresDirFlags : int64_t {
  404. /// flag undefined.
  405. OMP_REQ_UNDEFINED = 0x000,
  406. /// no requires clause present.
  407. OMP_REQ_NONE = 0x001,
  408. /// reverse_offload clause.
  409. OMP_REQ_REVERSE_OFFLOAD = 0x002,
  410. /// unified_address clause.
  411. OMP_REQ_UNIFIED_ADDRESS = 0x004,
  412. /// unified_shared_memory clause.
  413. OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
  414. /// dynamic_allocators clause.
  415. OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
  416. LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
  417. };
  418. enum OpenMPOffloadingReservedDeviceIDs {
  419. /// Device ID if the device was not defined, runtime should get it
  420. /// from environment variables in the spec.
  421. OMP_DEVICEID_UNDEF = -1,
  422. };
  423. } // anonymous namespace
  424. /// Describes ident structure that describes a source location.
  425. /// All descriptions are taken from
  426. /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
  427. /// Original structure:
  428. /// typedef struct ident {
  429. /// kmp_int32 reserved_1; /**< might be used in Fortran;
  430. /// see above */
  431. /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
  432. /// KMP_IDENT_KMPC identifies this union
  433. /// member */
  434. /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
  435. /// see above */
  436. ///#if USE_ITT_BUILD
  437. /// /* but currently used for storing
  438. /// region-specific ITT */
  439. /// /* contextual information. */
  440. ///#endif /* USE_ITT_BUILD */
  441. /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
  442. /// C++ */
  443. /// char const *psource; /**< String describing the source location.
  444. /// The string is composed of semi-colon separated
  445. // fields which describe the source file,
  446. /// the function and a pair of line numbers that
  447. /// delimit the construct.
  448. /// */
  449. /// } ident_t;
  450. enum IdentFieldIndex {
  451. /// might be used in Fortran
  452. IdentField_Reserved_1,
  453. /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
  454. IdentField_Flags,
  455. /// Not really used in Fortran any more
  456. IdentField_Reserved_2,
  457. /// Source[4] in Fortran, do not use for C++
  458. IdentField_Reserved_3,
  459. /// String describing the source location. The string is composed of
  460. /// semi-colon separated fields which describe the source file, the function
  461. /// and a pair of line numbers that delimit the construct.
  462. IdentField_PSource
  463. };
  464. /// Schedule types for 'omp for' loops (these enumerators are taken from
  465. /// the enum sched_type in kmp.h).
  466. enum OpenMPSchedType {
  467. /// Lower bound for default (unordered) versions.
  468. OMP_sch_lower = 32,
  469. OMP_sch_static_chunked = 33,
  470. OMP_sch_static = 34,
  471. OMP_sch_dynamic_chunked = 35,
  472. OMP_sch_guided_chunked = 36,
  473. OMP_sch_runtime = 37,
  474. OMP_sch_auto = 38,
  475. /// static with chunk adjustment (e.g., simd)
  476. OMP_sch_static_balanced_chunked = 45,
  477. /// Lower bound for 'ordered' versions.
  478. OMP_ord_lower = 64,
  479. OMP_ord_static_chunked = 65,
  480. OMP_ord_static = 66,
  481. OMP_ord_dynamic_chunked = 67,
  482. OMP_ord_guided_chunked = 68,
  483. OMP_ord_runtime = 69,
  484. OMP_ord_auto = 70,
  485. OMP_sch_default = OMP_sch_static,
  486. /// dist_schedule types
  487. OMP_dist_sch_static_chunked = 91,
  488. OMP_dist_sch_static = 92,
  489. /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
  490. /// Set if the monotonic schedule modifier was present.
  491. OMP_sch_modifier_monotonic = (1 << 29),
  492. /// Set if the nonmonotonic schedule modifier was present.
  493. OMP_sch_modifier_nonmonotonic = (1 << 30),
  494. };
  495. enum OpenMPRTLFunction {
  496. /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
  497. /// kmpc_micro microtask, ...);
  498. OMPRTL__kmpc_fork_call,
  499. /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
  500. /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
  501. OMPRTL__kmpc_threadprivate_cached,
  502. /// Call to void __kmpc_threadprivate_register( ident_t *,
  503. /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
  504. OMPRTL__kmpc_threadprivate_register,
  505. // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
  506. OMPRTL__kmpc_global_thread_num,
  507. // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
  508. // kmp_critical_name *crit);
  509. OMPRTL__kmpc_critical,
  510. // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
  511. // global_tid, kmp_critical_name *crit, uintptr_t hint);
  512. OMPRTL__kmpc_critical_with_hint,
  513. // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
  514. // kmp_critical_name *crit);
  515. OMPRTL__kmpc_end_critical,
  516. // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
  517. // global_tid);
  518. OMPRTL__kmpc_cancel_barrier,
  519. // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
  520. OMPRTL__kmpc_barrier,
  521. // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
  522. OMPRTL__kmpc_for_static_fini,
  523. // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
  524. // global_tid);
  525. OMPRTL__kmpc_serialized_parallel,
  526. // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
  527. // global_tid);
  528. OMPRTL__kmpc_end_serialized_parallel,
  529. // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
  530. // kmp_int32 num_threads);
  531. OMPRTL__kmpc_push_num_threads,
  532. // Call to void __kmpc_flush(ident_t *loc);
  533. OMPRTL__kmpc_flush,
  534. // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
  535. OMPRTL__kmpc_master,
  536. // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
  537. OMPRTL__kmpc_end_master,
  538. // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
  539. // int end_part);
  540. OMPRTL__kmpc_omp_taskyield,
  541. // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
  542. OMPRTL__kmpc_single,
  543. // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
  544. OMPRTL__kmpc_end_single,
  545. // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
  546. // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  547. // kmp_routine_entry_t *task_entry);
  548. OMPRTL__kmpc_omp_task_alloc,
  549. // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
  550. // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
  551. // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
  552. // kmp_int64 device_id);
  553. OMPRTL__kmpc_omp_target_task_alloc,
  554. // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
  555. // new_task);
  556. OMPRTL__kmpc_omp_task,
  557. // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
  558. // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
  559. // kmp_int32 didit);
  560. OMPRTL__kmpc_copyprivate,
  561. // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
  562. // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
  563. // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
  564. OMPRTL__kmpc_reduce,
  565. // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
  566. // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
  567. // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
  568. // *lck);
  569. OMPRTL__kmpc_reduce_nowait,
  570. // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
  571. // kmp_critical_name *lck);
  572. OMPRTL__kmpc_end_reduce,
  573. // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
  574. // kmp_critical_name *lck);
  575. OMPRTL__kmpc_end_reduce_nowait,
  576. // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
  577. // kmp_task_t * new_task);
  578. OMPRTL__kmpc_omp_task_begin_if0,
  579. // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
  580. // kmp_task_t * new_task);
  581. OMPRTL__kmpc_omp_task_complete_if0,
  582. // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
  583. OMPRTL__kmpc_ordered,
  584. // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
  585. OMPRTL__kmpc_end_ordered,
  586. // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  587. // global_tid);
  588. OMPRTL__kmpc_omp_taskwait,
  589. // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
  590. OMPRTL__kmpc_taskgroup,
  591. // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
  592. OMPRTL__kmpc_end_taskgroup,
  593. // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
  594. // int proc_bind);
  595. OMPRTL__kmpc_push_proc_bind,
  596. // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
  597. // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
  598. // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
  599. OMPRTL__kmpc_omp_task_with_deps,
  600. // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
  601. // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
  602. // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
  603. OMPRTL__kmpc_omp_wait_deps,
  604. // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
  605. // global_tid, kmp_int32 cncl_kind);
  606. OMPRTL__kmpc_cancellationpoint,
  607. // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
  608. // kmp_int32 cncl_kind);
  609. OMPRTL__kmpc_cancel,
  610. // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
  611. // kmp_int32 num_teams, kmp_int32 thread_limit);
  612. OMPRTL__kmpc_push_num_teams,
  613. // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
  614. // microtask, ...);
  615. OMPRTL__kmpc_fork_teams,
  616. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
  617. // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
  618. // sched, kmp_uint64 grainsize, void *task_dup);
  619. OMPRTL__kmpc_taskloop,
  620. // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
  621. // num_dims, struct kmp_dim *dims);
  622. OMPRTL__kmpc_doacross_init,
  623. // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
  624. OMPRTL__kmpc_doacross_fini,
  625. // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
  626. // *vec);
  627. OMPRTL__kmpc_doacross_post,
  628. // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
  629. // *vec);
  630. OMPRTL__kmpc_doacross_wait,
  631. // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
  632. // *data);
  633. OMPRTL__kmpc_task_reduction_init,
  634. // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
  635. // *d);
  636. OMPRTL__kmpc_task_reduction_get_th_data,
  637. // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
  638. OMPRTL__kmpc_alloc,
  639. // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
  640. OMPRTL__kmpc_free,
  641. //
  642. // Offloading related calls
  643. //
  644. // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
  645. // size);
  646. OMPRTL__kmpc_push_target_tripcount,
  647. // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
  648. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  649. // *arg_types);
  650. OMPRTL__tgt_target,
  651. // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
  652. // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  653. // *arg_types);
  654. OMPRTL__tgt_target_nowait,
  655. // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
  656. // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  657. // *arg_types, int32_t num_teams, int32_t thread_limit);
  658. OMPRTL__tgt_target_teams,
  659. // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
  660. // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
  661. // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
  662. OMPRTL__tgt_target_teams_nowait,
  663. // Call to void __tgt_register_requires(int64_t flags);
  664. OMPRTL__tgt_register_requires,
  665. // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
  666. OMPRTL__tgt_register_lib,
  667. // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
  668. OMPRTL__tgt_unregister_lib,
  669. // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
  670. // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
  671. OMPRTL__tgt_target_data_begin,
  672. // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
  673. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  674. // *arg_types);
  675. OMPRTL__tgt_target_data_begin_nowait,
  676. // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
  677. // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
  678. OMPRTL__tgt_target_data_end,
  679. // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
  680. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  681. // *arg_types);
  682. OMPRTL__tgt_target_data_end_nowait,
  683. // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
  684. // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
  685. OMPRTL__tgt_target_data_update,
  686. // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
  687. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  688. // *arg_types);
  689. OMPRTL__tgt_target_data_update_nowait,
  690. // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
  691. OMPRTL__tgt_mapper_num_components,
  692. // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
  693. // *base, void *begin, int64_t size, int64_t type);
  694. OMPRTL__tgt_push_mapper_component,
  695. };
  696. /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
  697. /// region.
  698. class CleanupTy final : public EHScopeStack::Cleanup {
  699. PrePostActionTy *Action;
  700. public:
  701. explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
  702. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  703. if (!CGF.HaveInsertPoint())
  704. return;
  705. Action->Exit(CGF);
  706. }
  707. };
  708. } // anonymous namespace
  709. void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
  710. CodeGenFunction::RunCleanupsScope Scope(CGF);
  711. if (PrePostAction) {
  712. CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
  713. Callback(CodeGen, CGF, *PrePostAction);
  714. } else {
  715. PrePostActionTy Action;
  716. Callback(CodeGen, CGF, Action);
  717. }
  718. }
  719. /// Check if the combiner is a call to UDR combiner and if it is so return the
  720. /// UDR decl used for reduction.
  721. static const OMPDeclareReductionDecl *
  722. getReductionInit(const Expr *ReductionOp) {
  723. if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
  724. if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
  725. if (const auto *DRE =
  726. dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
  727. if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
  728. return DRD;
  729. return nullptr;
  730. }
  731. static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
  732. const OMPDeclareReductionDecl *DRD,
  733. const Expr *InitOp,
  734. Address Private, Address Original,
  735. QualType Ty) {
  736. if (DRD->getInitializer()) {
  737. std::pair<llvm::Function *, llvm::Function *> Reduction =
  738. CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
  739. const auto *CE = cast<CallExpr>(InitOp);
  740. const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
  741. const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
  742. const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
  743. const auto *LHSDRE =
  744. cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
  745. const auto *RHSDRE =
  746. cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
  747. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  748. PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
  749. [=]() { return Private; });
  750. PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
  751. [=]() { return Original; });
  752. (void)PrivateScope.Privatize();
  753. RValue Func = RValue::get(Reduction.second);
  754. CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
  755. CGF.EmitIgnoredExpr(InitOp);
  756. } else {
  757. llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
  758. std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
  759. auto *GV = new llvm::GlobalVariable(
  760. CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
  761. llvm::GlobalValue::PrivateLinkage, Init, Name);
  762. LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
  763. RValue InitRVal;
  764. switch (CGF.getEvaluationKind(Ty)) {
  765. case TEK_Scalar:
  766. InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
  767. break;
  768. case TEK_Complex:
  769. InitRVal =
  770. RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
  771. break;
  772. case TEK_Aggregate:
  773. InitRVal = RValue::getAggregate(LV.getAddress());
  774. break;
  775. }
  776. OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
  777. CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
  778. CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
  779. /*IsInitializer=*/false);
  780. }
  781. }
  782. /// Emit initialization of arrays of complex types.
  783. /// \param DestAddr Address of the array.
  784. /// \param Type Type of array.
  785. /// \param Init Initial expression of array.
  786. /// \param SrcAddr Address of the original array.
  787. static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
  788. QualType Type, bool EmitDeclareReductionInit,
  789. const Expr *Init,
  790. const OMPDeclareReductionDecl *DRD,
  791. Address SrcAddr = Address::invalid()) {
  792. // Perform element-by-element initialization.
  793. QualType ElementTy;
  794. // Drill down to the base element type on both arrays.
  795. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
  796. llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
  797. DestAddr =
  798. CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
  799. if (DRD)
  800. SrcAddr =
  801. CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
  802. llvm::Value *SrcBegin = nullptr;
  803. if (DRD)
  804. SrcBegin = SrcAddr.getPointer();
  805. llvm::Value *DestBegin = DestAddr.getPointer();
  806. // Cast from pointer to array type to pointer to single element.
  807. llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
  808. // The basic structure here is a while-do loop.
  809. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
  810. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
  811. llvm::Value *IsEmpty =
  812. CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
  813. CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  814. // Enter the loop body, making that address the current address.
  815. llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
  816. CGF.EmitBlock(BodyBB);
  817. CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
  818. llvm::PHINode *SrcElementPHI = nullptr;
  819. Address SrcElementCurrent = Address::invalid();
  820. if (DRD) {
  821. SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
  822. "omp.arraycpy.srcElementPast");
  823. SrcElementPHI->addIncoming(SrcBegin, EntryBB);
  824. SrcElementCurrent =
  825. Address(SrcElementPHI,
  826. SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  827. }
  828. llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
  829. DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
  830. DestElementPHI->addIncoming(DestBegin, EntryBB);
  831. Address DestElementCurrent =
  832. Address(DestElementPHI,
  833. DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  834. // Emit copy.
  835. {
  836. CodeGenFunction::RunCleanupsScope InitScope(CGF);
  837. if (EmitDeclareReductionInit) {
  838. emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
  839. SrcElementCurrent, ElementTy);
  840. } else
  841. CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
  842. /*IsInitializer=*/false);
  843. }
  844. if (DRD) {
  845. // Shift the address forward by one element.
  846. llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
  847. SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  848. SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
  849. }
  850. // Shift the address forward by one element.
  851. llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
  852. DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  853. // Check whether we've reached the end.
  854. llvm::Value *Done =
  855. CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
  856. CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
  857. DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
  858. // Done.
  859. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  860. }
  861. LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
  862. return CGF.EmitOMPSharedLValue(E);
  863. }
  864. LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
  865. const Expr *E) {
  866. if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
  867. return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
  868. return LValue();
  869. }
  870. void ReductionCodeGen::emitAggregateInitialization(
  871. CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
  872. const OMPDeclareReductionDecl *DRD) {
  873. // Emit VarDecl with copy init for arrays.
  874. // Get the address of the original variable captured in current
  875. // captured region.
  876. const auto *PrivateVD =
  877. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  878. bool EmitDeclareReductionInit =
  879. DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
  880. EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
  881. EmitDeclareReductionInit,
  882. EmitDeclareReductionInit ? ClausesData[N].ReductionOp
  883. : PrivateVD->getInit(),
  884. DRD, SharedLVal.getAddress());
  885. }
  886. ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
  887. ArrayRef<const Expr *> Privates,
  888. ArrayRef<const Expr *> ReductionOps) {
  889. ClausesData.reserve(Shareds.size());
  890. SharedAddresses.reserve(Shareds.size());
  891. Sizes.reserve(Shareds.size());
  892. BaseDecls.reserve(Shareds.size());
  893. auto IPriv = Privates.begin();
  894. auto IRed = ReductionOps.begin();
  895. for (const Expr *Ref : Shareds) {
  896. ClausesData.emplace_back(Ref, *IPriv, *IRed);
  897. std::advance(IPriv, 1);
  898. std::advance(IRed, 1);
  899. }
  900. }
  901. void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
  902. assert(SharedAddresses.size() == N &&
  903. "Number of generated lvalues must be exactly N.");
  904. LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
  905. LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
  906. SharedAddresses.emplace_back(First, Second);
  907. }
  908. void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
  909. const auto *PrivateVD =
  910. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  911. QualType PrivateType = PrivateVD->getType();
  912. bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
  913. if (!PrivateType->isVariablyModifiedType()) {
  914. Sizes.emplace_back(
  915. CGF.getTypeSize(
  916. SharedAddresses[N].first.getType().getNonReferenceType()),
  917. nullptr);
  918. return;
  919. }
  920. llvm::Value *Size;
  921. llvm::Value *SizeInChars;
  922. auto *ElemType =
  923. cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
  924. ->getElementType();
  925. auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
  926. if (AsArraySection) {
  927. Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
  928. SharedAddresses[N].first.getPointer());
  929. Size = CGF.Builder.CreateNUWAdd(
  930. Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
  931. SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
  932. } else {
  933. SizeInChars = CGF.getTypeSize(
  934. SharedAddresses[N].first.getType().getNonReferenceType());
  935. Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
  936. }
  937. Sizes.emplace_back(SizeInChars, Size);
  938. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  939. CGF,
  940. cast<OpaqueValueExpr>(
  941. CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
  942. RValue::get(Size));
  943. CGF.EmitVariablyModifiedType(PrivateType);
  944. }
  945. void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
  946. llvm::Value *Size) {
  947. const auto *PrivateVD =
  948. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  949. QualType PrivateType = PrivateVD->getType();
  950. if (!PrivateType->isVariablyModifiedType()) {
  951. assert(!Size && !Sizes[N].second &&
  952. "Size should be nullptr for non-variably modified reduction "
  953. "items.");
  954. return;
  955. }
  956. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  957. CGF,
  958. cast<OpaqueValueExpr>(
  959. CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
  960. RValue::get(Size));
  961. CGF.EmitVariablyModifiedType(PrivateType);
  962. }
  963. void ReductionCodeGen::emitInitialization(
  964. CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
  965. llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
  966. assert(SharedAddresses.size() > N && "No variable was generated");
  967. const auto *PrivateVD =
  968. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  969. const OMPDeclareReductionDecl *DRD =
  970. getReductionInit(ClausesData[N].ReductionOp);
  971. QualType PrivateType = PrivateVD->getType();
  972. PrivateAddr = CGF.Builder.CreateElementBitCast(
  973. PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
  974. QualType SharedType = SharedAddresses[N].first.getType();
  975. SharedLVal = CGF.MakeAddrLValue(
  976. CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
  977. CGF.ConvertTypeForMem(SharedType)),
  978. SharedType, SharedAddresses[N].first.getBaseInfo(),
  979. CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
  980. if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
  981. emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
  982. } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
  983. emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
  984. PrivateAddr, SharedLVal.getAddress(),
  985. SharedLVal.getType());
  986. } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
  987. !CGF.isTrivialInitializer(PrivateVD->getInit())) {
  988. CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
  989. PrivateVD->getType().getQualifiers(),
  990. /*IsInitializer=*/false);
  991. }
  992. }
  993. bool ReductionCodeGen::needCleanups(unsigned N) {
  994. const auto *PrivateVD =
  995. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  996. QualType PrivateType = PrivateVD->getType();
  997. QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
  998. return DTorKind != QualType::DK_none;
  999. }
  1000. void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
  1001. Address PrivateAddr) {
  1002. const auto *PrivateVD =
  1003. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
  1004. QualType PrivateType = PrivateVD->getType();
  1005. QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
  1006. if (needCleanups(N)) {
  1007. PrivateAddr = CGF.Builder.CreateElementBitCast(
  1008. PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
  1009. CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
  1010. }
  1011. }
  1012. static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
  1013. LValue BaseLV) {
  1014. BaseTy = BaseTy.getNonReferenceType();
  1015. while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
  1016. !CGF.getContext().hasSameType(BaseTy, ElTy)) {
  1017. if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
  1018. BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
  1019. } else {
  1020. LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
  1021. BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
  1022. }
  1023. BaseTy = BaseTy->getPointeeType();
  1024. }
  1025. return CGF.MakeAddrLValue(
  1026. CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
  1027. CGF.ConvertTypeForMem(ElTy)),
  1028. BaseLV.getType(), BaseLV.getBaseInfo(),
  1029. CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
  1030. }
  1031. static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
  1032. llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
  1033. llvm::Value *Addr) {
  1034. Address Tmp = Address::invalid();
  1035. Address TopTmp = Address::invalid();
  1036. Address MostTopTmp = Address::invalid();
  1037. BaseTy = BaseTy.getNonReferenceType();
  1038. while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
  1039. !CGF.getContext().hasSameType(BaseTy, ElTy)) {
  1040. Tmp = CGF.CreateMemTemp(BaseTy);
  1041. if (TopTmp.isValid())
  1042. CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
  1043. else
  1044. MostTopTmp = Tmp;
  1045. TopTmp = Tmp;
  1046. BaseTy = BaseTy->getPointeeType();
  1047. }
  1048. llvm::Type *Ty = BaseLVType;
  1049. if (Tmp.isValid())
  1050. Ty = Tmp.getElementType();
  1051. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
  1052. if (Tmp.isValid()) {
  1053. CGF.Builder.CreateStore(Addr, Tmp);
  1054. return MostTopTmp;
  1055. }
  1056. return Address(Addr, BaseLVAlignment);
  1057. }
  1058. static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
  1059. const VarDecl *OrigVD = nullptr;
  1060. if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
  1061. const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
  1062. while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
  1063. Base = TempOASE->getBase()->IgnoreParenImpCasts();
  1064. while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
  1065. Base = TempASE->getBase()->IgnoreParenImpCasts();
  1066. DE = cast<DeclRefExpr>(Base);
  1067. OrigVD = cast<VarDecl>(DE->getDecl());
  1068. } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
  1069. const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
  1070. while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
  1071. Base = TempASE->getBase()->IgnoreParenImpCasts();
  1072. DE = cast<DeclRefExpr>(Base);
  1073. OrigVD = cast<VarDecl>(DE->getDecl());
  1074. }
  1075. return OrigVD;
  1076. }
  1077. Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
  1078. Address PrivateAddr) {
  1079. const DeclRefExpr *DE;
  1080. if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
  1081. BaseDecls.emplace_back(OrigVD);
  1082. LValue OriginalBaseLValue = CGF.EmitLValue(DE);
  1083. LValue BaseLValue =
  1084. loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
  1085. OriginalBaseLValue);
  1086. llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
  1087. BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
  1088. llvm::Value *PrivatePointer =
  1089. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  1090. PrivateAddr.getPointer(),
  1091. SharedAddresses[N].first.getAddress().getType());
  1092. llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
  1093. return castToBase(CGF, OrigVD->getType(),
  1094. SharedAddresses[N].first.getType(),
  1095. OriginalBaseLValue.getAddress().getType(),
  1096. OriginalBaseLValue.getAlignment(), Ptr);
  1097. }
  1098. BaseDecls.emplace_back(
  1099. cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
  1100. return PrivateAddr;
  1101. }
  1102. bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
  1103. const OMPDeclareReductionDecl *DRD =
  1104. getReductionInit(ClausesData[N].ReductionOp);
  1105. return DRD && DRD->getInitializer();
  1106. }
  1107. LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
  1108. return CGF.EmitLoadOfPointerLValue(
  1109. CGF.GetAddrOfLocalVar(getThreadIDVariable()),
  1110. getThreadIDVariable()->getType()->castAs<PointerType>());
  1111. }
  1112. void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
  1113. if (!CGF.HaveInsertPoint())
  1114. return;
  1115. // 1.2.2 OpenMP Language Terminology
  1116. // Structured block - An executable statement with a single entry at the
  1117. // top and a single exit at the bottom.
  1118. // The point of exit cannot be a branch out of the structured block.
  1119. // longjmp() and throw() must not violate the entry/exit criteria.
  1120. CGF.EHStack.pushTerminate();
  1121. CodeGen(CGF);
  1122. CGF.EHStack.popTerminate();
  1123. }
  1124. LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
  1125. CodeGenFunction &CGF) {
  1126. return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
  1127. getThreadIDVariable()->getType(),
  1128. AlignmentSource::Decl);
  1129. }
  1130. static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
  1131. QualType FieldTy) {
  1132. auto *Field = FieldDecl::Create(
  1133. C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
  1134. C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
  1135. /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
  1136. Field->setAccess(AS_public);
  1137. DC->addDecl(Field);
  1138. return Field;
  1139. }
  1140. CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
  1141. StringRef Separator)
  1142. : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
  1143. OffloadEntriesInfoManager(CGM) {
  1144. ASTContext &C = CGM.getContext();
  1145. RecordDecl *RD = C.buildImplicitRecord("ident_t");
  1146. QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  1147. RD->startDefinition();
  1148. // reserved_1
  1149. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  1150. // flags
  1151. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  1152. // reserved_2
  1153. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  1154. // reserved_3
  1155. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  1156. // psource
  1157. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  1158. RD->completeDefinition();
  1159. IdentQTy = C.getRecordType(RD);
  1160. IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
  1161. KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
  1162. loadOffloadInfoMetadata();
  1163. }
  1164. static bool tryEmitAlias(CodeGenModule &CGM, const GlobalDecl &NewGD,
  1165. const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr,
  1166. bool IsForDefinition) {
  1167. // Emit at least a definition for the aliasee if the the address of the
  1168. // original function is requested.
  1169. if (IsForDefinition || OrigAddr)
  1170. (void)CGM.GetAddrOfGlobal(NewGD);
  1171. StringRef NewMangledName = CGM.getMangledName(NewGD);
  1172. llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
  1173. if (Addr && !Addr->isDeclaration()) {
  1174. const auto *D = cast<FunctionDecl>(OldGD.getDecl());
  1175. const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
  1176. llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
  1177. // Create a reference to the named value. This ensures that it is emitted
  1178. // if a deferred decl.
  1179. llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
  1180. // Create the new alias itself, but don't set a name yet.
  1181. auto *GA =
  1182. llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
  1183. if (OrigAddr) {
  1184. assert(OrigAddr->isDeclaration() && "Expected declaration");
  1185. GA->takeName(OrigAddr);
  1186. OrigAddr->replaceAllUsesWith(
  1187. llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
  1188. OrigAddr->eraseFromParent();
  1189. } else {
  1190. GA->setName(CGM.getMangledName(OldGD));
  1191. }
  1192. // Set attributes which are particular to an alias; this is a
  1193. // specialization of the attributes which may be set on a global function.
  1194. if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
  1195. D->isWeakImported())
  1196. GA->setLinkage(llvm::Function::WeakAnyLinkage);
  1197. CGM.SetCommonAttributes(OldGD, GA);
  1198. return true;
  1199. }
  1200. return false;
  1201. }
  1202. void CGOpenMPRuntime::clear() {
  1203. InternalVars.clear();
  1204. // Clean non-target variable declarations possibly used only in debug info.
  1205. for (const auto &Data : EmittedNonTargetVariables) {
  1206. if (!Data.getValue().pointsToAliveValue())
  1207. continue;
  1208. auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
  1209. if (!GV)
  1210. continue;
  1211. if (!GV->isDeclaration() || GV->getNumUses() > 0)
  1212. continue;
  1213. GV->eraseFromParent();
  1214. }
  1215. // Emit aliases for the deferred aliasees.
  1216. for (const auto &Pair : DeferredVariantFunction) {
  1217. StringRef MangledName = CGM.getMangledName(Pair.second.second);
  1218. llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
  1219. // If not able to emit alias, just emit original declaration.
  1220. (void)tryEmitAlias(CGM, Pair.second.first, Pair.second.second, Addr,
  1221. /*IsForDefinition=*/false);
  1222. }
  1223. }
  1224. std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
  1225. SmallString<128> Buffer;
  1226. llvm::raw_svector_ostream OS(Buffer);
  1227. StringRef Sep = FirstSeparator;
  1228. for (StringRef Part : Parts) {
  1229. OS << Sep << Part;
  1230. Sep = Separator;
  1231. }
  1232. return OS.str();
  1233. }
  1234. static llvm::Function *
  1235. emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
  1236. const Expr *CombinerInitializer, const VarDecl *In,
  1237. const VarDecl *Out, bool IsCombiner) {
  1238. // void .omp_combiner.(Ty *in, Ty *out);
  1239. ASTContext &C = CGM.getContext();
  1240. QualType PtrTy = C.getPointerType(Ty).withRestrict();
  1241. FunctionArgList Args;
  1242. ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
  1243. /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
  1244. ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
  1245. /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
  1246. Args.push_back(&OmpOutParm);
  1247. Args.push_back(&OmpInParm);
  1248. const CGFunctionInfo &FnInfo =
  1249. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  1250. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  1251. std::string Name = CGM.getOpenMPRuntime().getName(
  1252. {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
  1253. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  1254. Name, &CGM.getModule());
  1255. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  1256. if (CGM.getLangOpts().Optimize) {
  1257. Fn->removeFnAttr(llvm::Attribute::NoInline);
  1258. Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
  1259. Fn->addFnAttr(llvm::Attribute::AlwaysInline);
  1260. }
  1261. CodeGenFunction CGF(CGM);
  1262. // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
  1263. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
  1264. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
  1265. Out->getLocation());
  1266. CodeGenFunction::OMPPrivateScope Scope(CGF);
  1267. Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
  1268. Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
  1269. return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
  1270. .getAddress();
  1271. });
  1272. Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
  1273. Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
  1274. return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
  1275. .getAddress();
  1276. });
  1277. (void)Scope.Privatize();
  1278. if (!IsCombiner && Out->hasInit() &&
  1279. !CGF.isTrivialInitializer(Out->getInit())) {
  1280. CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
  1281. Out->getType().getQualifiers(),
  1282. /*IsInitializer=*/true);
  1283. }
  1284. if (CombinerInitializer)
  1285. CGF.EmitIgnoredExpr(CombinerInitializer);
  1286. Scope.ForceCleanup();
  1287. CGF.FinishFunction();
  1288. return Fn;
  1289. }
  1290. void CGOpenMPRuntime::emitUserDefinedReduction(
  1291. CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
  1292. if (UDRMap.count(D) > 0)
  1293. return;
  1294. llvm::Function *Combiner = emitCombinerOrInitializer(
  1295. CGM, D->getType(), D->getCombiner(),
  1296. cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
  1297. cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
  1298. /*IsCombiner=*/true);
  1299. llvm::Function *Initializer = nullptr;
  1300. if (const Expr *Init = D->getInitializer()) {
  1301. Initializer = emitCombinerOrInitializer(
  1302. CGM, D->getType(),
  1303. D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
  1304. : nullptr,
  1305. cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
  1306. cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
  1307. /*IsCombiner=*/false);
  1308. }
  1309. UDRMap.try_emplace(D, Combiner, Initializer);
  1310. if (CGF) {
  1311. auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
  1312. Decls.second.push_back(D);
  1313. }
  1314. }
  1315. std::pair<llvm::Function *, llvm::Function *>
  1316. CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
  1317. auto I = UDRMap.find(D);
  1318. if (I != UDRMap.end())
  1319. return I->second;
  1320. emitUserDefinedReduction(/*CGF=*/nullptr, D);
  1321. return UDRMap.lookup(D);
  1322. }
  1323. static llvm::Function *emitParallelOrTeamsOutlinedFunction(
  1324. CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
  1325. const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
  1326. const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
  1327. assert(ThreadIDVar->getType()->isPointerType() &&
  1328. "thread id variable must be of type kmp_int32 *");
  1329. CodeGenFunction CGF(CGM, true);
  1330. bool HasCancel = false;
  1331. if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
  1332. HasCancel = OPD->hasCancel();
  1333. else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
  1334. HasCancel = OPSD->hasCancel();
  1335. else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
  1336. HasCancel = OPFD->hasCancel();
  1337. else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
  1338. HasCancel = OPFD->hasCancel();
  1339. else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
  1340. HasCancel = OPFD->hasCancel();
  1341. else if (const auto *OPFD =
  1342. dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
  1343. HasCancel = OPFD->hasCancel();
  1344. else if (const auto *OPFD =
  1345. dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
  1346. HasCancel = OPFD->hasCancel();
  1347. CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
  1348. HasCancel, OutlinedHelperName);
  1349. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  1350. return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
  1351. }
  1352. llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
  1353. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1354. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  1355. const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
  1356. return emitParallelOrTeamsOutlinedFunction(
  1357. CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
  1358. }
  1359. llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
  1360. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1361. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  1362. const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
  1363. return emitParallelOrTeamsOutlinedFunction(
  1364. CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
  1365. }
  1366. llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
  1367. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  1368. const VarDecl *PartIDVar, const VarDecl *TaskTVar,
  1369. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  1370. bool Tied, unsigned &NumberOfParts) {
  1371. auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
  1372. PrePostActionTy &) {
  1373. llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
  1374. llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
  1375. llvm::Value *TaskArgs[] = {
  1376. UpLoc, ThreadID,
  1377. CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
  1378. TaskTVar->getType()->castAs<PointerType>())
  1379. .getPointer()};
  1380. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
  1381. };
  1382. CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
  1383. UntiedCodeGen);
  1384. CodeGen.setAction(Action);
  1385. assert(!ThreadIDVar->getType()->isPointerType() &&
  1386. "thread id variable must be of type kmp_int32 for tasks");
  1387. const OpenMPDirectiveKind Region =
  1388. isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
  1389. : OMPD_task;
  1390. const CapturedStmt *CS = D.getCapturedStmt(Region);
  1391. const auto *TD = dyn_cast<OMPTaskDirective>(&D);
  1392. CodeGenFunction CGF(CGM, true);
  1393. CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
  1394. InnermostKind,
  1395. TD ? TD->hasCancel() : false, Action);
  1396. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  1397. llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
  1398. if (!Tied)
  1399. NumberOfParts = Action.getNumberOfParts();
  1400. return Res;
  1401. }
  1402. static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
  1403. const RecordDecl *RD, const CGRecordLayout &RL,
  1404. ArrayRef<llvm::Constant *> Data) {
  1405. llvm::StructType *StructTy = RL.getLLVMType();
  1406. unsigned PrevIdx = 0;
  1407. ConstantInitBuilder CIBuilder(CGM);
  1408. auto DI = Data.begin();
  1409. for (const FieldDecl *FD : RD->fields()) {
  1410. unsigned Idx = RL.getLLVMFieldNo(FD);
  1411. // Fill the alignment.
  1412. for (unsigned I = PrevIdx; I < Idx; ++I)
  1413. Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
  1414. PrevIdx = Idx + 1;
  1415. Fields.add(*DI);
  1416. ++DI;
  1417. }
  1418. }
  1419. template <class... As>
  1420. static llvm::GlobalVariable *
  1421. createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
  1422. ArrayRef<llvm::Constant *> Data, const Twine &Name,
  1423. As &&... Args) {
  1424. const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
  1425. const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
  1426. ConstantInitBuilder CIBuilder(CGM);
  1427. ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
  1428. buildStructValue(Fields, CGM, RD, RL, Data);
  1429. return Fields.finishAndCreateGlobal(
  1430. Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
  1431. std::forward<As>(Args)...);
  1432. }
  1433. template <typename T>
  1434. static void
  1435. createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
  1436. ArrayRef<llvm::Constant *> Data,
  1437. T &Parent) {
  1438. const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
  1439. const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
  1440. ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
  1441. buildStructValue(Fields, CGM, RD, RL, Data);
  1442. Fields.finishAndAddTo(Parent);
  1443. }
  1444. Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
  1445. CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
  1446. unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
  1447. FlagsTy FlagsKey(Flags, Reserved2Flags);
  1448. llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
  1449. if (!Entry) {
  1450. if (!DefaultOpenMPPSource) {
  1451. // Initialize default location for psource field of ident_t structure of
  1452. // all ident_t objects. Format is ";file;function;line;column;;".
  1453. // Taken from
  1454. // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
  1455. DefaultOpenMPPSource =
  1456. CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
  1457. DefaultOpenMPPSource =
  1458. llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
  1459. }
  1460. llvm::Constant *Data[] = {
  1461. llvm::ConstantInt::getNullValue(CGM.Int32Ty),
  1462. llvm::ConstantInt::get(CGM.Int32Ty, Flags),
  1463. llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
  1464. llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
  1465. llvm::GlobalValue *DefaultOpenMPLocation =
  1466. createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
  1467. llvm::GlobalValue::PrivateLinkage);
  1468. DefaultOpenMPLocation->setUnnamedAddr(
  1469. llvm::GlobalValue::UnnamedAddr::Global);
  1470. OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
  1471. }
  1472. return Address(Entry, Align);
  1473. }
  1474. void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
  1475. bool AtCurrentPoint) {
  1476. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1477. assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
  1478. llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
  1479. if (AtCurrentPoint) {
  1480. Elem.second.ServiceInsertPt = new llvm::BitCastInst(
  1481. Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
  1482. } else {
  1483. Elem.second.ServiceInsertPt =
  1484. new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
  1485. Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
  1486. }
  1487. }
  1488. void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
  1489. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1490. if (Elem.second.ServiceInsertPt) {
  1491. llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
  1492. Elem.second.ServiceInsertPt = nullptr;
  1493. Ptr->eraseFromParent();
  1494. }
  1495. }
  1496. llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
  1497. SourceLocation Loc,
  1498. unsigned Flags) {
  1499. Flags |= OMP_IDENT_KMPC;
  1500. // If no debug info is generated - return global default location.
  1501. if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
  1502. Loc.isInvalid())
  1503. return getOrCreateDefaultLocation(Flags).getPointer();
  1504. assert(CGF.CurFn && "No function in current CodeGenFunction.");
  1505. CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
  1506. Address LocValue = Address::invalid();
  1507. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
  1508. if (I != OpenMPLocThreadIDMap.end())
  1509. LocValue = Address(I->second.DebugLoc, Align);
  1510. // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
  1511. // GetOpenMPThreadID was called before this routine.
  1512. if (!LocValue.isValid()) {
  1513. // Generate "ident_t .kmpc_loc.addr;"
  1514. Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
  1515. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1516. Elem.second.DebugLoc = AI.getPointer();
  1517. LocValue = AI;
  1518. if (!Elem.second.ServiceInsertPt)
  1519. setLocThreadIdInsertPt(CGF);
  1520. CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
  1521. CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
  1522. CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
  1523. CGF.getTypeSize(IdentQTy));
  1524. }
  1525. // char **psource = &.kmpc_loc_<flags>.addr.psource;
  1526. LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
  1527. auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
  1528. LValue PSource =
  1529. CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
  1530. llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
  1531. if (OMPDebugLoc == nullptr) {
  1532. SmallString<128> Buffer2;
  1533. llvm::raw_svector_ostream OS2(Buffer2);
  1534. // Build debug location
  1535. PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
  1536. OS2 << ";" << PLoc.getFilename() << ";";
  1537. if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
  1538. OS2 << FD->getQualifiedNameAsString();
  1539. OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
  1540. OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
  1541. OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
  1542. }
  1543. // *psource = ";<File>;<Function>;<Line>;<Column>;;";
  1544. CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
  1545. // Our callers always pass this to a runtime function, so for
  1546. // convenience, go ahead and return a naked pointer.
  1547. return LocValue.getPointer();
  1548. }
  1549. llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
  1550. SourceLocation Loc) {
  1551. assert(CGF.CurFn && "No function in current CodeGenFunction.");
  1552. llvm::Value *ThreadID = nullptr;
  1553. // Check whether we've already cached a load of the thread id in this
  1554. // function.
  1555. auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
  1556. if (I != OpenMPLocThreadIDMap.end()) {
  1557. ThreadID = I->second.ThreadID;
  1558. if (ThreadID != nullptr)
  1559. return ThreadID;
  1560. }
  1561. // If exceptions are enabled, do not use parameter to avoid possible crash.
  1562. if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
  1563. !CGF.getLangOpts().CXXExceptions ||
  1564. CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
  1565. if (auto *OMPRegionInfo =
  1566. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  1567. if (OMPRegionInfo->getThreadIDVariable()) {
  1568. // Check if this an outlined function with thread id passed as argument.
  1569. LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
  1570. ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
  1571. // If value loaded in entry block, cache it and use it everywhere in
  1572. // function.
  1573. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
  1574. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1575. Elem.second.ThreadID = ThreadID;
  1576. }
  1577. return ThreadID;
  1578. }
  1579. }
  1580. }
  1581. // This is not an outlined function region - need to call __kmpc_int32
  1582. // kmpc_global_thread_num(ident_t *loc).
  1583. // Generate thread id value and cache this value for use across the
  1584. // function.
  1585. auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
  1586. if (!Elem.second.ServiceInsertPt)
  1587. setLocThreadIdInsertPt(CGF);
  1588. CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
  1589. CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
  1590. llvm::CallInst *Call = CGF.Builder.CreateCall(
  1591. createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
  1592. emitUpdateLocation(CGF, Loc));
  1593. Call->setCallingConv(CGF.getRuntimeCC());
  1594. Elem.second.ThreadID = Call;
  1595. return Call;
  1596. }
  1597. void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
  1598. assert(CGF.CurFn && "No function in current CodeGenFunction.");
  1599. if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
  1600. clearLocThreadIdInsertPt(CGF);
  1601. OpenMPLocThreadIDMap.erase(CGF.CurFn);
  1602. }
  1603. if (FunctionUDRMap.count(CGF.CurFn) > 0) {
  1604. for(auto *D : FunctionUDRMap[CGF.CurFn])
  1605. UDRMap.erase(D);
  1606. FunctionUDRMap.erase(CGF.CurFn);
  1607. }
  1608. auto I = FunctionUDMMap.find(CGF.CurFn);
  1609. if (I != FunctionUDMMap.end()) {
  1610. for(auto *D : I->second)
  1611. UDMMap.erase(D);
  1612. FunctionUDMMap.erase(I);
  1613. }
  1614. }
  1615. llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
  1616. return IdentTy->getPointerTo();
  1617. }
  1618. llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
  1619. if (!Kmpc_MicroTy) {
  1620. // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
  1621. llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
  1622. llvm::PointerType::getUnqual(CGM.Int32Ty)};
  1623. Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
  1624. }
  1625. return llvm::PointerType::getUnqual(Kmpc_MicroTy);
  1626. }
  1627. llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
  1628. llvm::FunctionCallee RTLFn = nullptr;
  1629. switch (static_cast<OpenMPRTLFunction>(Function)) {
  1630. case OMPRTL__kmpc_fork_call: {
  1631. // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
  1632. // microtask, ...);
  1633. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1634. getKmpc_MicroPointerTy()};
  1635. auto *FnTy =
  1636. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
  1637. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
  1638. if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
  1639. if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
  1640. llvm::LLVMContext &Ctx = F->getContext();
  1641. llvm::MDBuilder MDB(Ctx);
  1642. // Annotate the callback behavior of the __kmpc_fork_call:
  1643. // - The callback callee is argument number 2 (microtask).
  1644. // - The first two arguments of the callback callee are unknown (-1).
  1645. // - All variadic arguments to the __kmpc_fork_call are passed to the
  1646. // callback callee.
  1647. F->addMetadata(
  1648. llvm::LLVMContext::MD_callback,
  1649. *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
  1650. 2, {-1, -1},
  1651. /* VarArgsArePassed */ true)}));
  1652. }
  1653. }
  1654. break;
  1655. }
  1656. case OMPRTL__kmpc_global_thread_num: {
  1657. // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
  1658. llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
  1659. auto *FnTy =
  1660. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  1661. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
  1662. break;
  1663. }
  1664. case OMPRTL__kmpc_threadprivate_cached: {
  1665. // Build void *__kmpc_threadprivate_cached(ident_t *loc,
  1666. // kmp_int32 global_tid, void *data, size_t size, void ***cache);
  1667. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1668. CGM.VoidPtrTy, CGM.SizeTy,
  1669. CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
  1670. auto *FnTy =
  1671. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
  1672. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
  1673. break;
  1674. }
  1675. case OMPRTL__kmpc_critical: {
  1676. // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
  1677. // kmp_critical_name *crit);
  1678. llvm::Type *TypeParams[] = {
  1679. getIdentTyPointerTy(), CGM.Int32Ty,
  1680. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1681. auto *FnTy =
  1682. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1683. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
  1684. break;
  1685. }
  1686. case OMPRTL__kmpc_critical_with_hint: {
  1687. // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
  1688. // kmp_critical_name *crit, uintptr_t hint);
  1689. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1690. llvm::PointerType::getUnqual(KmpCriticalNameTy),
  1691. CGM.IntPtrTy};
  1692. auto *FnTy =
  1693. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1694. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
  1695. break;
  1696. }
  1697. case OMPRTL__kmpc_threadprivate_register: {
  1698. // Build void __kmpc_threadprivate_register(ident_t *, void *data,
  1699. // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
  1700. // typedef void *(*kmpc_ctor)(void *);
  1701. auto *KmpcCtorTy =
  1702. llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
  1703. /*isVarArg*/ false)->getPointerTo();
  1704. // typedef void *(*kmpc_cctor)(void *, void *);
  1705. llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  1706. auto *KmpcCopyCtorTy =
  1707. llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
  1708. /*isVarArg*/ false)
  1709. ->getPointerTo();
  1710. // typedef void (*kmpc_dtor)(void *);
  1711. auto *KmpcDtorTy =
  1712. llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
  1713. ->getPointerTo();
  1714. llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
  1715. KmpcCopyCtorTy, KmpcDtorTy};
  1716. auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
  1717. /*isVarArg*/ false);
  1718. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
  1719. break;
  1720. }
  1721. case OMPRTL__kmpc_end_critical: {
  1722. // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
  1723. // kmp_critical_name *crit);
  1724. llvm::Type *TypeParams[] = {
  1725. getIdentTyPointerTy(), CGM.Int32Ty,
  1726. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1727. auto *FnTy =
  1728. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1729. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
  1730. break;
  1731. }
  1732. case OMPRTL__kmpc_cancel_barrier: {
  1733. // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
  1734. // global_tid);
  1735. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1736. auto *FnTy =
  1737. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  1738. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
  1739. break;
  1740. }
  1741. case OMPRTL__kmpc_barrier: {
  1742. // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
  1743. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1744. auto *FnTy =
  1745. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1746. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
  1747. break;
  1748. }
  1749. case OMPRTL__kmpc_for_static_fini: {
  1750. // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
  1751. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1752. auto *FnTy =
  1753. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1754. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
  1755. break;
  1756. }
  1757. case OMPRTL__kmpc_push_num_threads: {
  1758. // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
  1759. // kmp_int32 num_threads)
  1760. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1761. CGM.Int32Ty};
  1762. auto *FnTy =
  1763. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1764. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
  1765. break;
  1766. }
  1767. case OMPRTL__kmpc_serialized_parallel: {
  1768. // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
  1769. // global_tid);
  1770. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1771. auto *FnTy =
  1772. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1773. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
  1774. break;
  1775. }
  1776. case OMPRTL__kmpc_end_serialized_parallel: {
  1777. // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
  1778. // global_tid);
  1779. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1780. auto *FnTy =
  1781. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1782. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
  1783. break;
  1784. }
  1785. case OMPRTL__kmpc_flush: {
  1786. // Build void __kmpc_flush(ident_t *loc);
  1787. llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
  1788. auto *FnTy =
  1789. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  1790. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
  1791. break;
  1792. }
  1793. case OMPRTL__kmpc_master: {
  1794. // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
  1795. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1796. auto *FnTy =
  1797. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1798. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
  1799. break;
  1800. }
  1801. case OMPRTL__kmpc_end_master: {
  1802. // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
  1803. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1804. auto *FnTy =
  1805. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1806. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
  1807. break;
  1808. }
  1809. case OMPRTL__kmpc_omp_taskyield: {
  1810. // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
  1811. // int end_part);
  1812. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
  1813. auto *FnTy =
  1814. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1815. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
  1816. break;
  1817. }
  1818. case OMPRTL__kmpc_single: {
  1819. // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
  1820. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1821. auto *FnTy =
  1822. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1823. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
  1824. break;
  1825. }
  1826. case OMPRTL__kmpc_end_single: {
  1827. // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
  1828. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1829. auto *FnTy =
  1830. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1831. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
  1832. break;
  1833. }
  1834. case OMPRTL__kmpc_omp_task_alloc: {
  1835. // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
  1836. // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  1837. // kmp_routine_entry_t *task_entry);
  1838. assert(KmpRoutineEntryPtrTy != nullptr &&
  1839. "Type kmp_routine_entry_t must be created.");
  1840. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
  1841. CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
  1842. // Return void * and then cast to particular kmp_task_t type.
  1843. auto *FnTy =
  1844. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
  1845. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
  1846. break;
  1847. }
  1848. case OMPRTL__kmpc_omp_target_task_alloc: {
  1849. // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
  1850. // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  1851. // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
  1852. assert(KmpRoutineEntryPtrTy != nullptr &&
  1853. "Type kmp_routine_entry_t must be created.");
  1854. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
  1855. CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
  1856. CGM.Int64Ty};
  1857. // Return void * and then cast to particular kmp_task_t type.
  1858. auto *FnTy =
  1859. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
  1860. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
  1861. break;
  1862. }
  1863. case OMPRTL__kmpc_omp_task: {
  1864. // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
  1865. // *new_task);
  1866. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1867. CGM.VoidPtrTy};
  1868. auto *FnTy =
  1869. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1870. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
  1871. break;
  1872. }
  1873. case OMPRTL__kmpc_copyprivate: {
  1874. // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
  1875. // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
  1876. // kmp_int32 didit);
  1877. llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  1878. auto *CpyFnTy =
  1879. llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
  1880. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
  1881. CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
  1882. CGM.Int32Ty};
  1883. auto *FnTy =
  1884. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1885. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
  1886. break;
  1887. }
  1888. case OMPRTL__kmpc_reduce: {
  1889. // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
  1890. // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
  1891. // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
  1892. llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  1893. auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
  1894. /*isVarArg=*/false);
  1895. llvm::Type *TypeParams[] = {
  1896. getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
  1897. CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
  1898. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1899. auto *FnTy =
  1900. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1901. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
  1902. break;
  1903. }
  1904. case OMPRTL__kmpc_reduce_nowait: {
  1905. // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
  1906. // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
  1907. // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
  1908. // *lck);
  1909. llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  1910. auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
  1911. /*isVarArg=*/false);
  1912. llvm::Type *TypeParams[] = {
  1913. getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
  1914. CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
  1915. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1916. auto *FnTy =
  1917. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1918. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
  1919. break;
  1920. }
  1921. case OMPRTL__kmpc_end_reduce: {
  1922. // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
  1923. // kmp_critical_name *lck);
  1924. llvm::Type *TypeParams[] = {
  1925. getIdentTyPointerTy(), CGM.Int32Ty,
  1926. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1927. auto *FnTy =
  1928. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1929. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
  1930. break;
  1931. }
  1932. case OMPRTL__kmpc_end_reduce_nowait: {
  1933. // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
  1934. // kmp_critical_name *lck);
  1935. llvm::Type *TypeParams[] = {
  1936. getIdentTyPointerTy(), CGM.Int32Ty,
  1937. llvm::PointerType::getUnqual(KmpCriticalNameTy)};
  1938. auto *FnTy =
  1939. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1940. RTLFn =
  1941. CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
  1942. break;
  1943. }
  1944. case OMPRTL__kmpc_omp_task_begin_if0: {
  1945. // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
  1946. // *new_task);
  1947. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1948. CGM.VoidPtrTy};
  1949. auto *FnTy =
  1950. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1951. RTLFn =
  1952. CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
  1953. break;
  1954. }
  1955. case OMPRTL__kmpc_omp_task_complete_if0: {
  1956. // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
  1957. // *new_task);
  1958. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  1959. CGM.VoidPtrTy};
  1960. auto *FnTy =
  1961. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1962. RTLFn = CGM.CreateRuntimeFunction(FnTy,
  1963. /*Name=*/"__kmpc_omp_task_complete_if0");
  1964. break;
  1965. }
  1966. case OMPRTL__kmpc_ordered: {
  1967. // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
  1968. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1969. auto *FnTy =
  1970. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1971. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
  1972. break;
  1973. }
  1974. case OMPRTL__kmpc_end_ordered: {
  1975. // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
  1976. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1977. auto *FnTy =
  1978. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1979. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
  1980. break;
  1981. }
  1982. case OMPRTL__kmpc_omp_taskwait: {
  1983. // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
  1984. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1985. auto *FnTy =
  1986. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  1987. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
  1988. break;
  1989. }
  1990. case OMPRTL__kmpc_taskgroup: {
  1991. // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
  1992. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  1993. auto *FnTy =
  1994. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  1995. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
  1996. break;
  1997. }
  1998. case OMPRTL__kmpc_end_taskgroup: {
  1999. // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
  2000. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  2001. auto *FnTy =
  2002. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2003. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
  2004. break;
  2005. }
  2006. case OMPRTL__kmpc_push_proc_bind: {
  2007. // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
  2008. // int proc_bind)
  2009. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
  2010. auto *FnTy =
  2011. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2012. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
  2013. break;
  2014. }
  2015. case OMPRTL__kmpc_omp_task_with_deps: {
  2016. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
  2017. // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
  2018. // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
  2019. llvm::Type *TypeParams[] = {
  2020. getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
  2021. CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
  2022. auto *FnTy =
  2023. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
  2024. RTLFn =
  2025. CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
  2026. break;
  2027. }
  2028. case OMPRTL__kmpc_omp_wait_deps: {
  2029. // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
  2030. // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
  2031. // kmp_depend_info_t *noalias_dep_list);
  2032. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  2033. CGM.Int32Ty, CGM.VoidPtrTy,
  2034. CGM.Int32Ty, CGM.VoidPtrTy};
  2035. auto *FnTy =
  2036. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2037. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
  2038. break;
  2039. }
  2040. case OMPRTL__kmpc_cancellationpoint: {
  2041. // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
  2042. // global_tid, kmp_int32 cncl_kind)
  2043. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
  2044. auto *FnTy =
  2045. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2046. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
  2047. break;
  2048. }
  2049. case OMPRTL__kmpc_cancel: {
  2050. // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
  2051. // kmp_int32 cncl_kind)
  2052. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
  2053. auto *FnTy =
  2054. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2055. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
  2056. break;
  2057. }
  2058. case OMPRTL__kmpc_push_num_teams: {
  2059. // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
  2060. // kmp_int32 num_teams, kmp_int32 num_threads)
  2061. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
  2062. CGM.Int32Ty};
  2063. auto *FnTy =
  2064. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2065. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
  2066. break;
  2067. }
  2068. case OMPRTL__kmpc_fork_teams: {
  2069. // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
  2070. // microtask, ...);
  2071. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  2072. getKmpc_MicroPointerTy()};
  2073. auto *FnTy =
  2074. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
  2075. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
  2076. if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
  2077. if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
  2078. llvm::LLVMContext &Ctx = F->getContext();
  2079. llvm::MDBuilder MDB(Ctx);
  2080. // Annotate the callback behavior of the __kmpc_fork_teams:
  2081. // - The callback callee is argument number 2 (microtask).
  2082. // - The first two arguments of the callback callee are unknown (-1).
  2083. // - All variadic arguments to the __kmpc_fork_teams are passed to the
  2084. // callback callee.
  2085. F->addMetadata(
  2086. llvm::LLVMContext::MD_callback,
  2087. *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
  2088. 2, {-1, -1},
  2089. /* VarArgsArePassed */ true)}));
  2090. }
  2091. }
  2092. break;
  2093. }
  2094. case OMPRTL__kmpc_taskloop: {
  2095. // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
  2096. // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
  2097. // sched, kmp_uint64 grainsize, void *task_dup);
  2098. llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
  2099. CGM.IntTy,
  2100. CGM.VoidPtrTy,
  2101. CGM.IntTy,
  2102. CGM.Int64Ty->getPointerTo(),
  2103. CGM.Int64Ty->getPointerTo(),
  2104. CGM.Int64Ty,
  2105. CGM.IntTy,
  2106. CGM.IntTy,
  2107. CGM.Int64Ty,
  2108. CGM.VoidPtrTy};
  2109. auto *FnTy =
  2110. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2111. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
  2112. break;
  2113. }
  2114. case OMPRTL__kmpc_doacross_init: {
  2115. // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
  2116. // num_dims, struct kmp_dim *dims);
  2117. llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
  2118. CGM.Int32Ty,
  2119. CGM.Int32Ty,
  2120. CGM.VoidPtrTy};
  2121. auto *FnTy =
  2122. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2123. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
  2124. break;
  2125. }
  2126. case OMPRTL__kmpc_doacross_fini: {
  2127. // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
  2128. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
  2129. auto *FnTy =
  2130. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2131. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
  2132. break;
  2133. }
  2134. case OMPRTL__kmpc_doacross_post: {
  2135. // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
  2136. // *vec);
  2137. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  2138. CGM.Int64Ty->getPointerTo()};
  2139. auto *FnTy =
  2140. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2141. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
  2142. break;
  2143. }
  2144. case OMPRTL__kmpc_doacross_wait: {
  2145. // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
  2146. // *vec);
  2147. llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
  2148. CGM.Int64Ty->getPointerTo()};
  2149. auto *FnTy =
  2150. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2151. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
  2152. break;
  2153. }
  2154. case OMPRTL__kmpc_task_reduction_init: {
  2155. // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
  2156. // *data);
  2157. llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
  2158. auto *FnTy =
  2159. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
  2160. RTLFn =
  2161. CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
  2162. break;
  2163. }
  2164. case OMPRTL__kmpc_task_reduction_get_th_data: {
  2165. // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
  2166. // *d);
  2167. llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
  2168. auto *FnTy =
  2169. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
  2170. RTLFn = CGM.CreateRuntimeFunction(
  2171. FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
  2172. break;
  2173. }
  2174. case OMPRTL__kmpc_alloc: {
  2175. // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
  2176. // al); omp_allocator_handle_t type is void *.
  2177. llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
  2178. auto *FnTy =
  2179. llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
  2180. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
  2181. break;
  2182. }
  2183. case OMPRTL__kmpc_free: {
  2184. // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
  2185. // al); omp_allocator_handle_t type is void *.
  2186. llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
  2187. auto *FnTy =
  2188. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2189. RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
  2190. break;
  2191. }
  2192. case OMPRTL__kmpc_push_target_tripcount: {
  2193. // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
  2194. // size);
  2195. llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
  2196. llvm::FunctionType *FnTy =
  2197. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2198. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
  2199. break;
  2200. }
  2201. case OMPRTL__tgt_target: {
  2202. // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
  2203. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  2204. // *arg_types);
  2205. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2206. CGM.VoidPtrTy,
  2207. CGM.Int32Ty,
  2208. CGM.VoidPtrPtrTy,
  2209. CGM.VoidPtrPtrTy,
  2210. CGM.Int64Ty->getPointerTo(),
  2211. CGM.Int64Ty->getPointerTo()};
  2212. auto *FnTy =
  2213. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2214. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
  2215. break;
  2216. }
  2217. case OMPRTL__tgt_target_nowait: {
  2218. // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
  2219. // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
  2220. // int64_t *arg_types);
  2221. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2222. CGM.VoidPtrTy,
  2223. CGM.Int32Ty,
  2224. CGM.VoidPtrPtrTy,
  2225. CGM.VoidPtrPtrTy,
  2226. CGM.Int64Ty->getPointerTo(),
  2227. CGM.Int64Ty->getPointerTo()};
  2228. auto *FnTy =
  2229. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2230. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
  2231. break;
  2232. }
  2233. case OMPRTL__tgt_target_teams: {
  2234. // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
  2235. // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
  2236. // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
  2237. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2238. CGM.VoidPtrTy,
  2239. CGM.Int32Ty,
  2240. CGM.VoidPtrPtrTy,
  2241. CGM.VoidPtrPtrTy,
  2242. CGM.Int64Ty->getPointerTo(),
  2243. CGM.Int64Ty->getPointerTo(),
  2244. CGM.Int32Ty,
  2245. CGM.Int32Ty};
  2246. auto *FnTy =
  2247. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2248. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
  2249. break;
  2250. }
  2251. case OMPRTL__tgt_target_teams_nowait: {
  2252. // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
  2253. // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
  2254. // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
  2255. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2256. CGM.VoidPtrTy,
  2257. CGM.Int32Ty,
  2258. CGM.VoidPtrPtrTy,
  2259. CGM.VoidPtrPtrTy,
  2260. CGM.Int64Ty->getPointerTo(),
  2261. CGM.Int64Ty->getPointerTo(),
  2262. CGM.Int32Ty,
  2263. CGM.Int32Ty};
  2264. auto *FnTy =
  2265. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2266. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
  2267. break;
  2268. }
  2269. case OMPRTL__tgt_register_requires: {
  2270. // Build void __tgt_register_requires(int64_t flags);
  2271. llvm::Type *TypeParams[] = {CGM.Int64Ty};
  2272. auto *FnTy =
  2273. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2274. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
  2275. break;
  2276. }
  2277. case OMPRTL__tgt_register_lib: {
  2278. // Build void __tgt_register_lib(__tgt_bin_desc *desc);
  2279. QualType ParamTy =
  2280. CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
  2281. llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
  2282. auto *FnTy =
  2283. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2284. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
  2285. break;
  2286. }
  2287. case OMPRTL__tgt_unregister_lib: {
  2288. // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
  2289. QualType ParamTy =
  2290. CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
  2291. llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
  2292. auto *FnTy =
  2293. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2294. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
  2295. break;
  2296. }
  2297. case OMPRTL__tgt_target_data_begin: {
  2298. // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
  2299. // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
  2300. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2301. CGM.Int32Ty,
  2302. CGM.VoidPtrPtrTy,
  2303. CGM.VoidPtrPtrTy,
  2304. CGM.Int64Ty->getPointerTo(),
  2305. CGM.Int64Ty->getPointerTo()};
  2306. auto *FnTy =
  2307. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2308. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
  2309. break;
  2310. }
  2311. case OMPRTL__tgt_target_data_begin_nowait: {
  2312. // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
  2313. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  2314. // *arg_types);
  2315. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2316. CGM.Int32Ty,
  2317. CGM.VoidPtrPtrTy,
  2318. CGM.VoidPtrPtrTy,
  2319. CGM.Int64Ty->getPointerTo(),
  2320. CGM.Int64Ty->getPointerTo()};
  2321. auto *FnTy =
  2322. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2323. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
  2324. break;
  2325. }
  2326. case OMPRTL__tgt_target_data_end: {
  2327. // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
  2328. // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
  2329. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2330. CGM.Int32Ty,
  2331. CGM.VoidPtrPtrTy,
  2332. CGM.VoidPtrPtrTy,
  2333. CGM.Int64Ty->getPointerTo(),
  2334. CGM.Int64Ty->getPointerTo()};
  2335. auto *FnTy =
  2336. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2337. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
  2338. break;
  2339. }
  2340. case OMPRTL__tgt_target_data_end_nowait: {
  2341. // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
  2342. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  2343. // *arg_types);
  2344. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2345. CGM.Int32Ty,
  2346. CGM.VoidPtrPtrTy,
  2347. CGM.VoidPtrPtrTy,
  2348. CGM.Int64Ty->getPointerTo(),
  2349. CGM.Int64Ty->getPointerTo()};
  2350. auto *FnTy =
  2351. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2352. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
  2353. break;
  2354. }
  2355. case OMPRTL__tgt_target_data_update: {
  2356. // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
  2357. // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
  2358. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2359. CGM.Int32Ty,
  2360. CGM.VoidPtrPtrTy,
  2361. CGM.VoidPtrPtrTy,
  2362. CGM.Int64Ty->getPointerTo(),
  2363. CGM.Int64Ty->getPointerTo()};
  2364. auto *FnTy =
  2365. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2366. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
  2367. break;
  2368. }
  2369. case OMPRTL__tgt_target_data_update_nowait: {
  2370. // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
  2371. // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
  2372. // *arg_types);
  2373. llvm::Type *TypeParams[] = {CGM.Int64Ty,
  2374. CGM.Int32Ty,
  2375. CGM.VoidPtrPtrTy,
  2376. CGM.VoidPtrPtrTy,
  2377. CGM.Int64Ty->getPointerTo(),
  2378. CGM.Int64Ty->getPointerTo()};
  2379. auto *FnTy =
  2380. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2381. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
  2382. break;
  2383. }
  2384. case OMPRTL__tgt_mapper_num_components: {
  2385. // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
  2386. llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
  2387. auto *FnTy =
  2388. llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
  2389. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
  2390. break;
  2391. }
  2392. case OMPRTL__tgt_push_mapper_component: {
  2393. // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
  2394. // *base, void *begin, int64_t size, int64_t type);
  2395. llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
  2396. CGM.Int64Ty, CGM.Int64Ty};
  2397. auto *FnTy =
  2398. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2399. RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
  2400. break;
  2401. }
  2402. }
  2403. assert(RTLFn && "Unable to find OpenMP runtime function");
  2404. return RTLFn;
  2405. }
  2406. llvm::FunctionCallee
  2407. CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
  2408. assert((IVSize == 32 || IVSize == 64) &&
  2409. "IV size is not compatible with the omp runtime");
  2410. StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
  2411. : "__kmpc_for_static_init_4u")
  2412. : (IVSigned ? "__kmpc_for_static_init_8"
  2413. : "__kmpc_for_static_init_8u");
  2414. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  2415. auto *PtrTy = llvm::PointerType::getUnqual(ITy);
  2416. llvm::Type *TypeParams[] = {
  2417. getIdentTyPointerTy(), // loc
  2418. CGM.Int32Ty, // tid
  2419. CGM.Int32Ty, // schedtype
  2420. llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
  2421. PtrTy, // p_lower
  2422. PtrTy, // p_upper
  2423. PtrTy, // p_stride
  2424. ITy, // incr
  2425. ITy // chunk
  2426. };
  2427. auto *FnTy =
  2428. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2429. return CGM.CreateRuntimeFunction(FnTy, Name);
  2430. }
  2431. llvm::FunctionCallee
  2432. CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
  2433. assert((IVSize == 32 || IVSize == 64) &&
  2434. "IV size is not compatible with the omp runtime");
  2435. StringRef Name =
  2436. IVSize == 32
  2437. ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
  2438. : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
  2439. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  2440. llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
  2441. CGM.Int32Ty, // tid
  2442. CGM.Int32Ty, // schedtype
  2443. ITy, // lower
  2444. ITy, // upper
  2445. ITy, // stride
  2446. ITy // chunk
  2447. };
  2448. auto *FnTy =
  2449. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
  2450. return CGM.CreateRuntimeFunction(FnTy, Name);
  2451. }
  2452. llvm::FunctionCallee
  2453. CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
  2454. assert((IVSize == 32 || IVSize == 64) &&
  2455. "IV size is not compatible with the omp runtime");
  2456. StringRef Name =
  2457. IVSize == 32
  2458. ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
  2459. : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
  2460. llvm::Type *TypeParams[] = {
  2461. getIdentTyPointerTy(), // loc
  2462. CGM.Int32Ty, // tid
  2463. };
  2464. auto *FnTy =
  2465. llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
  2466. return CGM.CreateRuntimeFunction(FnTy, Name);
  2467. }
  2468. llvm::FunctionCallee
  2469. CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
  2470. assert((IVSize == 32 || IVSize == 64) &&
  2471. "IV size is not compatible with the omp runtime");
  2472. StringRef Name =
  2473. IVSize == 32
  2474. ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
  2475. : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
  2476. llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
  2477. auto *PtrTy = llvm::PointerType::getUnqual(ITy);
  2478. llvm::Type *TypeParams[] = {
  2479. getIdentTyPointerTy(), // loc
  2480. CGM.Int32Ty, // tid
  2481. llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
  2482. PtrTy, // p_lower
  2483. PtrTy, // p_upper
  2484. PtrTy // p_stride
  2485. };
  2486. auto *FnTy =
  2487. llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
  2488. return CGM.CreateRuntimeFunction(FnTy, Name);
  2489. }
  2490. /// Obtain information that uniquely identifies a target entry. This
  2491. /// consists of the file and device IDs as well as line number associated with
  2492. /// the relevant entry source location.
  2493. static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
  2494. unsigned &DeviceID, unsigned &FileID,
  2495. unsigned &LineNum) {
  2496. SourceManager &SM = C.getSourceManager();
  2497. // The loc should be always valid and have a file ID (the user cannot use
  2498. // #pragma directives in macros)
  2499. assert(Loc.isValid() && "Source location is expected to be always valid.");
  2500. PresumedLoc PLoc = SM.getPresumedLoc(Loc);
  2501. assert(PLoc.isValid() && "Source location is expected to be always valid.");
  2502. llvm::sys::fs::UniqueID ID;
  2503. if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
  2504. SM.getDiagnostics().Report(diag::err_cannot_open_file)
  2505. << PLoc.getFilename() << EC.message();
  2506. DeviceID = ID.getDevice();
  2507. FileID = ID.getFile();
  2508. LineNum = PLoc.getLine();
  2509. }
  2510. Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
  2511. if (CGM.getLangOpts().OpenMPSimd)
  2512. return Address::invalid();
  2513. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  2514. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  2515. if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
  2516. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  2517. HasRequiresUnifiedSharedMemory))) {
  2518. SmallString<64> PtrName;
  2519. {
  2520. llvm::raw_svector_ostream OS(PtrName);
  2521. OS << CGM.getMangledName(GlobalDecl(VD));
  2522. if (!VD->isExternallyVisible()) {
  2523. unsigned DeviceID, FileID, Line;
  2524. getTargetEntryUniqueInfo(CGM.getContext(),
  2525. VD->getCanonicalDecl()->getBeginLoc(),
  2526. DeviceID, FileID, Line);
  2527. OS << llvm::format("_%x", FileID);
  2528. }
  2529. OS << "_decl_tgt_ref_ptr";
  2530. }
  2531. llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
  2532. if (!Ptr) {
  2533. QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
  2534. Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
  2535. PtrName);
  2536. auto *GV = cast<llvm::GlobalVariable>(Ptr);
  2537. GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
  2538. if (!CGM.getLangOpts().OpenMPIsDevice)
  2539. GV->setInitializer(CGM.GetAddrOfGlobal(VD));
  2540. registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
  2541. }
  2542. return Address(Ptr, CGM.getContext().getDeclAlign(VD));
  2543. }
  2544. return Address::invalid();
  2545. }
  2546. llvm::Constant *
  2547. CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
  2548. assert(!CGM.getLangOpts().OpenMPUseTLS ||
  2549. !CGM.getContext().getTargetInfo().isTLSSupported());
  2550. // Lookup the entry, lazily creating it if necessary.
  2551. std::string Suffix = getName({"cache", ""});
  2552. return getOrCreateInternalVariable(
  2553. CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
  2554. }
  2555. Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
  2556. const VarDecl *VD,
  2557. Address VDAddr,
  2558. SourceLocation Loc) {
  2559. if (CGM.getLangOpts().OpenMPUseTLS &&
  2560. CGM.getContext().getTargetInfo().isTLSSupported())
  2561. return VDAddr;
  2562. llvm::Type *VarTy = VDAddr.getElementType();
  2563. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  2564. CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
  2565. CGM.Int8PtrTy),
  2566. CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
  2567. getOrCreateThreadPrivateCache(VD)};
  2568. return Address(CGF.EmitRuntimeCall(
  2569. createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
  2570. VDAddr.getAlignment());
  2571. }
  2572. void CGOpenMPRuntime::emitThreadPrivateVarInit(
  2573. CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
  2574. llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
  2575. // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
  2576. // library.
  2577. llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
  2578. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
  2579. OMPLoc);
  2580. // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
  2581. // to register constructor/destructor for variable.
  2582. llvm::Value *Args[] = {
  2583. OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
  2584. Ctor, CopyCtor, Dtor};
  2585. CGF.EmitRuntimeCall(
  2586. createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
  2587. }
  2588. llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
  2589. const VarDecl *VD, Address VDAddr, SourceLocation Loc,
  2590. bool PerformInit, CodeGenFunction *CGF) {
  2591. if (CGM.getLangOpts().OpenMPUseTLS &&
  2592. CGM.getContext().getTargetInfo().isTLSSupported())
  2593. return nullptr;
  2594. VD = VD->getDefinition(CGM.getContext());
  2595. if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
  2596. QualType ASTTy = VD->getType();
  2597. llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
  2598. const Expr *Init = VD->getAnyInitializer();
  2599. if (CGM.getLangOpts().CPlusPlus && PerformInit) {
  2600. // Generate function that re-emits the declaration's initializer into the
  2601. // threadprivate copy of the variable VD
  2602. CodeGenFunction CtorCGF(CGM);
  2603. FunctionArgList Args;
  2604. ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
  2605. /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
  2606. ImplicitParamDecl::Other);
  2607. Args.push_back(&Dst);
  2608. const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
  2609. CGM.getContext().VoidPtrTy, Args);
  2610. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  2611. std::string Name = getName({"__kmpc_global_ctor_", ""});
  2612. llvm::Function *Fn =
  2613. CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
  2614. CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
  2615. Args, Loc, Loc);
  2616. llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
  2617. CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
  2618. CGM.getContext().VoidPtrTy, Dst.getLocation());
  2619. Address Arg = Address(ArgVal, VDAddr.getAlignment());
  2620. Arg = CtorCGF.Builder.CreateElementBitCast(
  2621. Arg, CtorCGF.ConvertTypeForMem(ASTTy));
  2622. CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
  2623. /*IsInitializer=*/true);
  2624. ArgVal = CtorCGF.EmitLoadOfScalar(
  2625. CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
  2626. CGM.getContext().VoidPtrTy, Dst.getLocation());
  2627. CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
  2628. CtorCGF.FinishFunction();
  2629. Ctor = Fn;
  2630. }
  2631. if (VD->getType().isDestructedType() != QualType::DK_none) {
  2632. // Generate function that emits destructor call for the threadprivate copy
  2633. // of the variable VD
  2634. CodeGenFunction DtorCGF(CGM);
  2635. FunctionArgList Args;
  2636. ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
  2637. /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
  2638. ImplicitParamDecl::Other);
  2639. Args.push_back(&Dst);
  2640. const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
  2641. CGM.getContext().VoidTy, Args);
  2642. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  2643. std::string Name = getName({"__kmpc_global_dtor_", ""});
  2644. llvm::Function *Fn =
  2645. CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
  2646. auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
  2647. DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
  2648. Loc, Loc);
  2649. // Create a scope with an artificial location for the body of this function.
  2650. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
  2651. llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
  2652. DtorCGF.GetAddrOfLocalVar(&Dst),
  2653. /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
  2654. DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
  2655. DtorCGF.getDestroyer(ASTTy.isDestructedType()),
  2656. DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
  2657. DtorCGF.FinishFunction();
  2658. Dtor = Fn;
  2659. }
  2660. // Do not emit init function if it is not required.
  2661. if (!Ctor && !Dtor)
  2662. return nullptr;
  2663. llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
  2664. auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
  2665. /*isVarArg=*/false)
  2666. ->getPointerTo();
  2667. // Copying constructor for the threadprivate variable.
  2668. // Must be NULL - reserved by runtime, but currently it requires that this
  2669. // parameter is always NULL. Otherwise it fires assertion.
  2670. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
  2671. if (Ctor == nullptr) {
  2672. auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
  2673. /*isVarArg=*/false)
  2674. ->getPointerTo();
  2675. Ctor = llvm::Constant::getNullValue(CtorTy);
  2676. }
  2677. if (Dtor == nullptr) {
  2678. auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
  2679. /*isVarArg=*/false)
  2680. ->getPointerTo();
  2681. Dtor = llvm::Constant::getNullValue(DtorTy);
  2682. }
  2683. if (!CGF) {
  2684. auto *InitFunctionTy =
  2685. llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
  2686. std::string Name = getName({"__omp_threadprivate_init_", ""});
  2687. llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
  2688. InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
  2689. CodeGenFunction InitCGF(CGM);
  2690. FunctionArgList ArgList;
  2691. InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
  2692. CGM.getTypes().arrangeNullaryFunction(), ArgList,
  2693. Loc, Loc);
  2694. emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
  2695. InitCGF.FinishFunction();
  2696. return InitFunction;
  2697. }
  2698. emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
  2699. }
  2700. return nullptr;
  2701. }
  2702. bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
  2703. llvm::GlobalVariable *Addr,
  2704. bool PerformInit) {
  2705. if (CGM.getLangOpts().OMPTargetTriples.empty() &&
  2706. !CGM.getLangOpts().OpenMPIsDevice)
  2707. return false;
  2708. Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  2709. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  2710. if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
  2711. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  2712. HasRequiresUnifiedSharedMemory))
  2713. return CGM.getLangOpts().OpenMPIsDevice;
  2714. VD = VD->getDefinition(CGM.getContext());
  2715. if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
  2716. return CGM.getLangOpts().OpenMPIsDevice;
  2717. QualType ASTTy = VD->getType();
  2718. SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
  2719. // Produce the unique prefix to identify the new target regions. We use
  2720. // the source location of the variable declaration which we know to not
  2721. // conflict with any target region.
  2722. unsigned DeviceID;
  2723. unsigned FileID;
  2724. unsigned Line;
  2725. getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
  2726. SmallString<128> Buffer, Out;
  2727. {
  2728. llvm::raw_svector_ostream OS(Buffer);
  2729. OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
  2730. << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
  2731. }
  2732. const Expr *Init = VD->getAnyInitializer();
  2733. if (CGM.getLangOpts().CPlusPlus && PerformInit) {
  2734. llvm::Constant *Ctor;
  2735. llvm::Constant *ID;
  2736. if (CGM.getLangOpts().OpenMPIsDevice) {
  2737. // Generate function that re-emits the declaration's initializer into
  2738. // the threadprivate copy of the variable VD
  2739. CodeGenFunction CtorCGF(CGM);
  2740. const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
  2741. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  2742. llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
  2743. FTy, Twine(Buffer, "_ctor"), FI, Loc);
  2744. auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
  2745. CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
  2746. FunctionArgList(), Loc, Loc);
  2747. auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
  2748. CtorCGF.EmitAnyExprToMem(Init,
  2749. Address(Addr, CGM.getContext().getDeclAlign(VD)),
  2750. Init->getType().getQualifiers(),
  2751. /*IsInitializer=*/true);
  2752. CtorCGF.FinishFunction();
  2753. Ctor = Fn;
  2754. ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
  2755. CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
  2756. } else {
  2757. Ctor = new llvm::GlobalVariable(
  2758. CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
  2759. llvm::GlobalValue::PrivateLinkage,
  2760. llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
  2761. ID = Ctor;
  2762. }
  2763. // Register the information for the entry associated with the constructor.
  2764. Out.clear();
  2765. OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
  2766. DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
  2767. ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
  2768. }
  2769. if (VD->getType().isDestructedType() != QualType::DK_none) {
  2770. llvm::Constant *Dtor;
  2771. llvm::Constant *ID;
  2772. if (CGM.getLangOpts().OpenMPIsDevice) {
  2773. // Generate function that emits destructor call for the threadprivate
  2774. // copy of the variable VD
  2775. CodeGenFunction DtorCGF(CGM);
  2776. const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
  2777. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  2778. llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
  2779. FTy, Twine(Buffer, "_dtor"), FI, Loc);
  2780. auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
  2781. DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
  2782. FunctionArgList(), Loc, Loc);
  2783. // Create a scope with an artificial location for the body of this
  2784. // function.
  2785. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
  2786. DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
  2787. ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
  2788. DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
  2789. DtorCGF.FinishFunction();
  2790. Dtor = Fn;
  2791. ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
  2792. CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
  2793. } else {
  2794. Dtor = new llvm::GlobalVariable(
  2795. CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
  2796. llvm::GlobalValue::PrivateLinkage,
  2797. llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
  2798. ID = Dtor;
  2799. }
  2800. // Register the information for the entry associated with the destructor.
  2801. Out.clear();
  2802. OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
  2803. DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
  2804. ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
  2805. }
  2806. return CGM.getLangOpts().OpenMPIsDevice;
  2807. }
  2808. Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
  2809. QualType VarType,
  2810. StringRef Name) {
  2811. std::string Suffix = getName({"artificial", ""});
  2812. std::string CacheSuffix = getName({"cache", ""});
  2813. llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
  2814. llvm::Value *GAddr =
  2815. getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
  2816. llvm::Value *Args[] = {
  2817. emitUpdateLocation(CGF, SourceLocation()),
  2818. getThreadID(CGF, SourceLocation()),
  2819. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
  2820. CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
  2821. /*isSigned=*/false),
  2822. getOrCreateInternalVariable(
  2823. CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
  2824. return Address(
  2825. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  2826. CGF.EmitRuntimeCall(
  2827. createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
  2828. VarLVType->getPointerTo(/*AddrSpace=*/0)),
  2829. CGM.getPointerAlign());
  2830. }
  2831. void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
  2832. const RegionCodeGenTy &ThenGen,
  2833. const RegionCodeGenTy &ElseGen) {
  2834. CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
  2835. // If the condition constant folds and can be elided, try to avoid emitting
  2836. // the condition and the dead arm of the if/else.
  2837. bool CondConstant;
  2838. if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
  2839. if (CondConstant)
  2840. ThenGen(CGF);
  2841. else
  2842. ElseGen(CGF);
  2843. return;
  2844. }
  2845. // Otherwise, the condition did not fold, or we couldn't elide it. Just
  2846. // emit the conditional branch.
  2847. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
  2848. llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
  2849. llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
  2850. CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
  2851. // Emit the 'then' code.
  2852. CGF.EmitBlock(ThenBlock);
  2853. ThenGen(CGF);
  2854. CGF.EmitBranch(ContBlock);
  2855. // Emit the 'else' code if present.
  2856. // There is no need to emit line number for unconditional branch.
  2857. (void)ApplyDebugLocation::CreateEmpty(CGF);
  2858. CGF.EmitBlock(ElseBlock);
  2859. ElseGen(CGF);
  2860. // There is no need to emit line number for unconditional branch.
  2861. (void)ApplyDebugLocation::CreateEmpty(CGF);
  2862. CGF.EmitBranch(ContBlock);
  2863. // Emit the continuation block for code after the if.
  2864. CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
  2865. }
  2866. void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
  2867. llvm::Function *OutlinedFn,
  2868. ArrayRef<llvm::Value *> CapturedVars,
  2869. const Expr *IfCond) {
  2870. if (!CGF.HaveInsertPoint())
  2871. return;
  2872. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  2873. auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
  2874. PrePostActionTy &) {
  2875. // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
  2876. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  2877. llvm::Value *Args[] = {
  2878. RTLoc,
  2879. CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
  2880. CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
  2881. llvm::SmallVector<llvm::Value *, 16> RealArgs;
  2882. RealArgs.append(std::begin(Args), std::end(Args));
  2883. RealArgs.append(CapturedVars.begin(), CapturedVars.end());
  2884. llvm::FunctionCallee RTLFn =
  2885. RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
  2886. CGF.EmitRuntimeCall(RTLFn, RealArgs);
  2887. };
  2888. auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
  2889. PrePostActionTy &) {
  2890. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  2891. llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
  2892. // Build calls:
  2893. // __kmpc_serialized_parallel(&Loc, GTid);
  2894. llvm::Value *Args[] = {RTLoc, ThreadID};
  2895. CGF.EmitRuntimeCall(
  2896. RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
  2897. // OutlinedFn(&GTid, &zero, CapturedStruct);
  2898. Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
  2899. /*Name*/ ".zero.addr");
  2900. CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
  2901. llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
  2902. // ThreadId for serialized parallels is 0.
  2903. OutlinedFnArgs.push_back(ZeroAddr.getPointer());
  2904. OutlinedFnArgs.push_back(ZeroAddr.getPointer());
  2905. OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
  2906. RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
  2907. // __kmpc_end_serialized_parallel(&Loc, GTid);
  2908. llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
  2909. CGF.EmitRuntimeCall(
  2910. RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
  2911. EndArgs);
  2912. };
  2913. if (IfCond) {
  2914. emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
  2915. } else {
  2916. RegionCodeGenTy ThenRCG(ThenGen);
  2917. ThenRCG(CGF);
  2918. }
  2919. }
  2920. // If we're inside an (outlined) parallel region, use the region info's
  2921. // thread-ID variable (it is passed in a first argument of the outlined function
  2922. // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
  2923. // regular serial code region, get thread ID by calling kmp_int32
  2924. // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
  2925. // return the address of that temp.
  2926. Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
  2927. SourceLocation Loc) {
  2928. if (auto *OMPRegionInfo =
  2929. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  2930. if (OMPRegionInfo->getThreadIDVariable())
  2931. return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
  2932. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  2933. QualType Int32Ty =
  2934. CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
  2935. Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
  2936. CGF.EmitStoreOfScalar(ThreadID,
  2937. CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
  2938. return ThreadIDTemp;
  2939. }
  2940. llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
  2941. llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
  2942. SmallString<256> Buffer;
  2943. llvm::raw_svector_ostream Out(Buffer);
  2944. Out << Name;
  2945. StringRef RuntimeName = Out.str();
  2946. auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
  2947. if (Elem.second) {
  2948. assert(Elem.second->getType()->getPointerElementType() == Ty &&
  2949. "OMP internal variable has different type than requested");
  2950. return &*Elem.second;
  2951. }
  2952. return Elem.second = new llvm::GlobalVariable(
  2953. CGM.getModule(), Ty, /*IsConstant*/ false,
  2954. llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
  2955. Elem.first(), /*InsertBefore=*/nullptr,
  2956. llvm::GlobalValue::NotThreadLocal, AddressSpace);
  2957. }
  2958. llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
  2959. std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
  2960. std::string Name = getName({Prefix, "var"});
  2961. return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
  2962. }
  2963. namespace {
  2964. /// Common pre(post)-action for different OpenMP constructs.
  2965. class CommonActionTy final : public PrePostActionTy {
  2966. llvm::FunctionCallee EnterCallee;
  2967. ArrayRef<llvm::Value *> EnterArgs;
  2968. llvm::FunctionCallee ExitCallee;
  2969. ArrayRef<llvm::Value *> ExitArgs;
  2970. bool Conditional;
  2971. llvm::BasicBlock *ContBlock = nullptr;
  2972. public:
  2973. CommonActionTy(llvm::FunctionCallee EnterCallee,
  2974. ArrayRef<llvm::Value *> EnterArgs,
  2975. llvm::FunctionCallee ExitCallee,
  2976. ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
  2977. : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
  2978. ExitArgs(ExitArgs), Conditional(Conditional) {}
  2979. void Enter(CodeGenFunction &CGF) override {
  2980. llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
  2981. if (Conditional) {
  2982. llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
  2983. auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
  2984. ContBlock = CGF.createBasicBlock("omp_if.end");
  2985. // Generate the branch (If-stmt)
  2986. CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
  2987. CGF.EmitBlock(ThenBlock);
  2988. }
  2989. }
  2990. void Done(CodeGenFunction &CGF) {
  2991. // Emit the rest of blocks/branches
  2992. CGF.EmitBranch(ContBlock);
  2993. CGF.EmitBlock(ContBlock, true);
  2994. }
  2995. void Exit(CodeGenFunction &CGF) override {
  2996. CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
  2997. }
  2998. };
  2999. } // anonymous namespace
  3000. void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
  3001. StringRef CriticalName,
  3002. const RegionCodeGenTy &CriticalOpGen,
  3003. SourceLocation Loc, const Expr *Hint) {
  3004. // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
  3005. // CriticalOpGen();
  3006. // __kmpc_end_critical(ident_t *, gtid, Lock);
  3007. // Prepare arguments and build a call to __kmpc_critical
  3008. if (!CGF.HaveInsertPoint())
  3009. return;
  3010. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  3011. getCriticalRegionLock(CriticalName)};
  3012. llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
  3013. std::end(Args));
  3014. if (Hint) {
  3015. EnterArgs.push_back(CGF.Builder.CreateIntCast(
  3016. CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
  3017. }
  3018. CommonActionTy Action(
  3019. createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
  3020. : OMPRTL__kmpc_critical),
  3021. EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
  3022. CriticalOpGen.setAction(Action);
  3023. emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
  3024. }
  3025. void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
  3026. const RegionCodeGenTy &MasterOpGen,
  3027. SourceLocation Loc) {
  3028. if (!CGF.HaveInsertPoint())
  3029. return;
  3030. // if(__kmpc_master(ident_t *, gtid)) {
  3031. // MasterOpGen();
  3032. // __kmpc_end_master(ident_t *, gtid);
  3033. // }
  3034. // Prepare arguments and build a call to __kmpc_master
  3035. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  3036. CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
  3037. createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
  3038. /*Conditional=*/true);
  3039. MasterOpGen.setAction(Action);
  3040. emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
  3041. Action.Done(CGF);
  3042. }
  3043. void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
  3044. SourceLocation Loc) {
  3045. if (!CGF.HaveInsertPoint())
  3046. return;
  3047. // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
  3048. llvm::Value *Args[] = {
  3049. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  3050. llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
  3051. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
  3052. if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  3053. Region->emitUntiedSwitch(CGF);
  3054. }
  3055. void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
  3056. const RegionCodeGenTy &TaskgroupOpGen,
  3057. SourceLocation Loc) {
  3058. if (!CGF.HaveInsertPoint())
  3059. return;
  3060. // __kmpc_taskgroup(ident_t *, gtid);
  3061. // TaskgroupOpGen();
  3062. // __kmpc_end_taskgroup(ident_t *, gtid);
  3063. // Prepare arguments and build a call to __kmpc_taskgroup
  3064. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  3065. CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
  3066. createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
  3067. Args);
  3068. TaskgroupOpGen.setAction(Action);
  3069. emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
  3070. }
  3071. /// Given an array of pointers to variables, project the address of a
  3072. /// given variable.
  3073. static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
  3074. unsigned Index, const VarDecl *Var) {
  3075. // Pull out the pointer to the variable.
  3076. Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
  3077. llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
  3078. Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
  3079. Addr = CGF.Builder.CreateElementBitCast(
  3080. Addr, CGF.ConvertTypeForMem(Var->getType()));
  3081. return Addr;
  3082. }
  3083. static llvm::Value *emitCopyprivateCopyFunction(
  3084. CodeGenModule &CGM, llvm::Type *ArgsType,
  3085. ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
  3086. ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
  3087. SourceLocation Loc) {
  3088. ASTContext &C = CGM.getContext();
  3089. // void copy_func(void *LHSArg, void *RHSArg);
  3090. FunctionArgList Args;
  3091. ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  3092. ImplicitParamDecl::Other);
  3093. ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  3094. ImplicitParamDecl::Other);
  3095. Args.push_back(&LHSArg);
  3096. Args.push_back(&RHSArg);
  3097. const auto &CGFI =
  3098. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  3099. std::string Name =
  3100. CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
  3101. auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
  3102. llvm::GlobalValue::InternalLinkage, Name,
  3103. &CGM.getModule());
  3104. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
  3105. Fn->setDoesNotRecurse();
  3106. CodeGenFunction CGF(CGM);
  3107. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
  3108. // Dest = (void*[n])(LHSArg);
  3109. // Src = (void*[n])(RHSArg);
  3110. Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3111. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
  3112. ArgsType), CGF.getPointerAlign());
  3113. Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3114. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
  3115. ArgsType), CGF.getPointerAlign());
  3116. // *(Type0*)Dst[0] = *(Type0*)Src[0];
  3117. // *(Type1*)Dst[1] = *(Type1*)Src[1];
  3118. // ...
  3119. // *(Typen*)Dst[n] = *(Typen*)Src[n];
  3120. for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
  3121. const auto *DestVar =
  3122. cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
  3123. Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
  3124. const auto *SrcVar =
  3125. cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
  3126. Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
  3127. const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
  3128. QualType Type = VD->getType();
  3129. CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
  3130. }
  3131. CGF.FinishFunction();
  3132. return Fn;
  3133. }
  3134. void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
  3135. const RegionCodeGenTy &SingleOpGen,
  3136. SourceLocation Loc,
  3137. ArrayRef<const Expr *> CopyprivateVars,
  3138. ArrayRef<const Expr *> SrcExprs,
  3139. ArrayRef<const Expr *> DstExprs,
  3140. ArrayRef<const Expr *> AssignmentOps) {
  3141. if (!CGF.HaveInsertPoint())
  3142. return;
  3143. assert(CopyprivateVars.size() == SrcExprs.size() &&
  3144. CopyprivateVars.size() == DstExprs.size() &&
  3145. CopyprivateVars.size() == AssignmentOps.size());
  3146. ASTContext &C = CGM.getContext();
  3147. // int32 did_it = 0;
  3148. // if(__kmpc_single(ident_t *, gtid)) {
  3149. // SingleOpGen();
  3150. // __kmpc_end_single(ident_t *, gtid);
  3151. // did_it = 1;
  3152. // }
  3153. // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
  3154. // <copy_func>, did_it);
  3155. Address DidIt = Address::invalid();
  3156. if (!CopyprivateVars.empty()) {
  3157. // int32 did_it = 0;
  3158. QualType KmpInt32Ty =
  3159. C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  3160. DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
  3161. CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
  3162. }
  3163. // Prepare arguments and build a call to __kmpc_single
  3164. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  3165. CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
  3166. createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
  3167. /*Conditional=*/true);
  3168. SingleOpGen.setAction(Action);
  3169. emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
  3170. if (DidIt.isValid()) {
  3171. // did_it = 1;
  3172. CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
  3173. }
  3174. Action.Done(CGF);
  3175. // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
  3176. // <copy_func>, did_it);
  3177. if (DidIt.isValid()) {
  3178. llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
  3179. QualType CopyprivateArrayTy = C.getConstantArrayType(
  3180. C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
  3181. /*IndexTypeQuals=*/0);
  3182. // Create a list of all private variables for copyprivate.
  3183. Address CopyprivateList =
  3184. CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
  3185. for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
  3186. Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
  3187. CGF.Builder.CreateStore(
  3188. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  3189. CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
  3190. Elem);
  3191. }
  3192. // Build function that copies private values from single region to all other
  3193. // threads in the corresponding parallel region.
  3194. llvm::Value *CpyFn = emitCopyprivateCopyFunction(
  3195. CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
  3196. CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
  3197. llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
  3198. Address CL =
  3199. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
  3200. CGF.VoidPtrTy);
  3201. llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
  3202. llvm::Value *Args[] = {
  3203. emitUpdateLocation(CGF, Loc), // ident_t *<loc>
  3204. getThreadID(CGF, Loc), // i32 <gtid>
  3205. BufSize, // size_t <buf_size>
  3206. CL.getPointer(), // void *<copyprivate list>
  3207. CpyFn, // void (*) (void *, void *) <copy_func>
  3208. DidItVal // i32 did_it
  3209. };
  3210. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
  3211. }
  3212. }
  3213. void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
  3214. const RegionCodeGenTy &OrderedOpGen,
  3215. SourceLocation Loc, bool IsThreads) {
  3216. if (!CGF.HaveInsertPoint())
  3217. return;
  3218. // __kmpc_ordered(ident_t *, gtid);
  3219. // OrderedOpGen();
  3220. // __kmpc_end_ordered(ident_t *, gtid);
  3221. // Prepare arguments and build a call to __kmpc_ordered
  3222. if (IsThreads) {
  3223. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  3224. CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
  3225. createRuntimeFunction(OMPRTL__kmpc_end_ordered),
  3226. Args);
  3227. OrderedOpGen.setAction(Action);
  3228. emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
  3229. return;
  3230. }
  3231. emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
  3232. }
  3233. unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
  3234. unsigned Flags;
  3235. if (Kind == OMPD_for)
  3236. Flags = OMP_IDENT_BARRIER_IMPL_FOR;
  3237. else if (Kind == OMPD_sections)
  3238. Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
  3239. else if (Kind == OMPD_single)
  3240. Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
  3241. else if (Kind == OMPD_barrier)
  3242. Flags = OMP_IDENT_BARRIER_EXPL;
  3243. else
  3244. Flags = OMP_IDENT_BARRIER_IMPL;
  3245. return Flags;
  3246. }
  3247. void CGOpenMPRuntime::getDefaultScheduleAndChunk(
  3248. CodeGenFunction &CGF, const OMPLoopDirective &S,
  3249. OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
  3250. // Check if the loop directive is actually a doacross loop directive. In this
  3251. // case choose static, 1 schedule.
  3252. if (llvm::any_of(
  3253. S.getClausesOfKind<OMPOrderedClause>(),
  3254. [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
  3255. ScheduleKind = OMPC_SCHEDULE_static;
  3256. // Chunk size is 1 in this case.
  3257. llvm::APInt ChunkSize(32, 1);
  3258. ChunkExpr = IntegerLiteral::Create(
  3259. CGF.getContext(), ChunkSize,
  3260. CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
  3261. SourceLocation());
  3262. }
  3263. }
  3264. void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
  3265. OpenMPDirectiveKind Kind, bool EmitChecks,
  3266. bool ForceSimpleCall) {
  3267. if (!CGF.HaveInsertPoint())
  3268. return;
  3269. // Build call __kmpc_cancel_barrier(loc, thread_id);
  3270. // Build call __kmpc_barrier(loc, thread_id);
  3271. unsigned Flags = getDefaultFlagsForBarriers(Kind);
  3272. // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
  3273. // thread_id);
  3274. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
  3275. getThreadID(CGF, Loc)};
  3276. if (auto *OMPRegionInfo =
  3277. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  3278. if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
  3279. llvm::Value *Result = CGF.EmitRuntimeCall(
  3280. createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
  3281. if (EmitChecks) {
  3282. // if (__kmpc_cancel_barrier()) {
  3283. // exit from construct;
  3284. // }
  3285. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  3286. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  3287. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  3288. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  3289. CGF.EmitBlock(ExitBB);
  3290. // exit from construct;
  3291. CodeGenFunction::JumpDest CancelDestination =
  3292. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  3293. CGF.EmitBranchThroughCleanup(CancelDestination);
  3294. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  3295. }
  3296. return;
  3297. }
  3298. }
  3299. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
  3300. }
  3301. /// Map the OpenMP loop schedule to the runtime enumeration.
  3302. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
  3303. bool Chunked, bool Ordered) {
  3304. switch (ScheduleKind) {
  3305. case OMPC_SCHEDULE_static:
  3306. return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
  3307. : (Ordered ? OMP_ord_static : OMP_sch_static);
  3308. case OMPC_SCHEDULE_dynamic:
  3309. return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
  3310. case OMPC_SCHEDULE_guided:
  3311. return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
  3312. case OMPC_SCHEDULE_runtime:
  3313. return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
  3314. case OMPC_SCHEDULE_auto:
  3315. return Ordered ? OMP_ord_auto : OMP_sch_auto;
  3316. case OMPC_SCHEDULE_unknown:
  3317. assert(!Chunked && "chunk was specified but schedule kind not known");
  3318. return Ordered ? OMP_ord_static : OMP_sch_static;
  3319. }
  3320. llvm_unreachable("Unexpected runtime schedule");
  3321. }
  3322. /// Map the OpenMP distribute schedule to the runtime enumeration.
  3323. static OpenMPSchedType
  3324. getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
  3325. // only static is allowed for dist_schedule
  3326. return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
  3327. }
  3328. bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
  3329. bool Chunked) const {
  3330. OpenMPSchedType Schedule =
  3331. getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
  3332. return Schedule == OMP_sch_static;
  3333. }
  3334. bool CGOpenMPRuntime::isStaticNonchunked(
  3335. OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
  3336. OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
  3337. return Schedule == OMP_dist_sch_static;
  3338. }
  3339. bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
  3340. bool Chunked) const {
  3341. OpenMPSchedType Schedule =
  3342. getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
  3343. return Schedule == OMP_sch_static_chunked;
  3344. }
  3345. bool CGOpenMPRuntime::isStaticChunked(
  3346. OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
  3347. OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
  3348. return Schedule == OMP_dist_sch_static_chunked;
  3349. }
  3350. bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
  3351. OpenMPSchedType Schedule =
  3352. getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
  3353. assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
  3354. return Schedule != OMP_sch_static;
  3355. }
  3356. static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
  3357. OpenMPScheduleClauseModifier M1,
  3358. OpenMPScheduleClauseModifier M2) {
  3359. int Modifier = 0;
  3360. switch (M1) {
  3361. case OMPC_SCHEDULE_MODIFIER_monotonic:
  3362. Modifier = OMP_sch_modifier_monotonic;
  3363. break;
  3364. case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
  3365. Modifier = OMP_sch_modifier_nonmonotonic;
  3366. break;
  3367. case OMPC_SCHEDULE_MODIFIER_simd:
  3368. if (Schedule == OMP_sch_static_chunked)
  3369. Schedule = OMP_sch_static_balanced_chunked;
  3370. break;
  3371. case OMPC_SCHEDULE_MODIFIER_last:
  3372. case OMPC_SCHEDULE_MODIFIER_unknown:
  3373. break;
  3374. }
  3375. switch (M2) {
  3376. case OMPC_SCHEDULE_MODIFIER_monotonic:
  3377. Modifier = OMP_sch_modifier_monotonic;
  3378. break;
  3379. case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
  3380. Modifier = OMP_sch_modifier_nonmonotonic;
  3381. break;
  3382. case OMPC_SCHEDULE_MODIFIER_simd:
  3383. if (Schedule == OMP_sch_static_chunked)
  3384. Schedule = OMP_sch_static_balanced_chunked;
  3385. break;
  3386. case OMPC_SCHEDULE_MODIFIER_last:
  3387. case OMPC_SCHEDULE_MODIFIER_unknown:
  3388. break;
  3389. }
  3390. // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
  3391. // If the static schedule kind is specified or if the ordered clause is
  3392. // specified, and if the nonmonotonic modifier is not specified, the effect is
  3393. // as if the monotonic modifier is specified. Otherwise, unless the monotonic
  3394. // modifier is specified, the effect is as if the nonmonotonic modifier is
  3395. // specified.
  3396. if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
  3397. if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
  3398. Schedule == OMP_sch_static_balanced_chunked ||
  3399. Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
  3400. Modifier = OMP_sch_modifier_nonmonotonic;
  3401. }
  3402. return Schedule | Modifier;
  3403. }
  3404. void CGOpenMPRuntime::emitForDispatchInit(
  3405. CodeGenFunction &CGF, SourceLocation Loc,
  3406. const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
  3407. bool Ordered, const DispatchRTInput &DispatchValues) {
  3408. if (!CGF.HaveInsertPoint())
  3409. return;
  3410. OpenMPSchedType Schedule = getRuntimeSchedule(
  3411. ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
  3412. assert(Ordered ||
  3413. (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
  3414. Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
  3415. Schedule != OMP_sch_static_balanced_chunked));
  3416. // Call __kmpc_dispatch_init(
  3417. // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
  3418. // kmp_int[32|64] lower, kmp_int[32|64] upper,
  3419. // kmp_int[32|64] stride, kmp_int[32|64] chunk);
  3420. // If the Chunk was not specified in the clause - use default value 1.
  3421. llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
  3422. : CGF.Builder.getIntN(IVSize, 1);
  3423. llvm::Value *Args[] = {
  3424. emitUpdateLocation(CGF, Loc),
  3425. getThreadID(CGF, Loc),
  3426. CGF.Builder.getInt32(addMonoNonMonoModifier(
  3427. CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
  3428. DispatchValues.LB, // Lower
  3429. DispatchValues.UB, // Upper
  3430. CGF.Builder.getIntN(IVSize, 1), // Stride
  3431. Chunk // Chunk
  3432. };
  3433. CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
  3434. }
  3435. static void emitForStaticInitCall(
  3436. CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
  3437. llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
  3438. OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
  3439. const CGOpenMPRuntime::StaticRTInput &Values) {
  3440. if (!CGF.HaveInsertPoint())
  3441. return;
  3442. assert(!Values.Ordered);
  3443. assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
  3444. Schedule == OMP_sch_static_balanced_chunked ||
  3445. Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
  3446. Schedule == OMP_dist_sch_static ||
  3447. Schedule == OMP_dist_sch_static_chunked);
  3448. // Call __kmpc_for_static_init(
  3449. // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
  3450. // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
  3451. // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
  3452. // kmp_int[32|64] incr, kmp_int[32|64] chunk);
  3453. llvm::Value *Chunk = Values.Chunk;
  3454. if (Chunk == nullptr) {
  3455. assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
  3456. Schedule == OMP_dist_sch_static) &&
  3457. "expected static non-chunked schedule");
  3458. // If the Chunk was not specified in the clause - use default value 1.
  3459. Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
  3460. } else {
  3461. assert((Schedule == OMP_sch_static_chunked ||
  3462. Schedule == OMP_sch_static_balanced_chunked ||
  3463. Schedule == OMP_ord_static_chunked ||
  3464. Schedule == OMP_dist_sch_static_chunked) &&
  3465. "expected static chunked schedule");
  3466. }
  3467. llvm::Value *Args[] = {
  3468. UpdateLocation,
  3469. ThreadId,
  3470. CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
  3471. M2)), // Schedule type
  3472. Values.IL.getPointer(), // &isLastIter
  3473. Values.LB.getPointer(), // &LB
  3474. Values.UB.getPointer(), // &UB
  3475. Values.ST.getPointer(), // &Stride
  3476. CGF.Builder.getIntN(Values.IVSize, 1), // Incr
  3477. Chunk // Chunk
  3478. };
  3479. CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
  3480. }
  3481. void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
  3482. SourceLocation Loc,
  3483. OpenMPDirectiveKind DKind,
  3484. const OpenMPScheduleTy &ScheduleKind,
  3485. const StaticRTInput &Values) {
  3486. OpenMPSchedType ScheduleNum = getRuntimeSchedule(
  3487. ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
  3488. assert(isOpenMPWorksharingDirective(DKind) &&
  3489. "Expected loop-based or sections-based directive.");
  3490. llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
  3491. isOpenMPLoopDirective(DKind)
  3492. ? OMP_IDENT_WORK_LOOP
  3493. : OMP_IDENT_WORK_SECTIONS);
  3494. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  3495. llvm::FunctionCallee StaticInitFunction =
  3496. createForStaticInitFunction(Values.IVSize, Values.IVSigned);
  3497. emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
  3498. ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
  3499. }
  3500. void CGOpenMPRuntime::emitDistributeStaticInit(
  3501. CodeGenFunction &CGF, SourceLocation Loc,
  3502. OpenMPDistScheduleClauseKind SchedKind,
  3503. const CGOpenMPRuntime::StaticRTInput &Values) {
  3504. OpenMPSchedType ScheduleNum =
  3505. getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
  3506. llvm::Value *UpdatedLocation =
  3507. emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
  3508. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  3509. llvm::FunctionCallee StaticInitFunction =
  3510. createForStaticInitFunction(Values.IVSize, Values.IVSigned);
  3511. emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
  3512. ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
  3513. OMPC_SCHEDULE_MODIFIER_unknown, Values);
  3514. }
  3515. void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
  3516. SourceLocation Loc,
  3517. OpenMPDirectiveKind DKind) {
  3518. if (!CGF.HaveInsertPoint())
  3519. return;
  3520. // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
  3521. llvm::Value *Args[] = {
  3522. emitUpdateLocation(CGF, Loc,
  3523. isOpenMPDistributeDirective(DKind)
  3524. ? OMP_IDENT_WORK_DISTRIBUTE
  3525. : isOpenMPLoopDirective(DKind)
  3526. ? OMP_IDENT_WORK_LOOP
  3527. : OMP_IDENT_WORK_SECTIONS),
  3528. getThreadID(CGF, Loc)};
  3529. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
  3530. Args);
  3531. }
  3532. void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
  3533. SourceLocation Loc,
  3534. unsigned IVSize,
  3535. bool IVSigned) {
  3536. if (!CGF.HaveInsertPoint())
  3537. return;
  3538. // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
  3539. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  3540. CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
  3541. }
  3542. llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
  3543. SourceLocation Loc, unsigned IVSize,
  3544. bool IVSigned, Address IL,
  3545. Address LB, Address UB,
  3546. Address ST) {
  3547. // Call __kmpc_dispatch_next(
  3548. // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
  3549. // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
  3550. // kmp_int[32|64] *p_stride);
  3551. llvm::Value *Args[] = {
  3552. emitUpdateLocation(CGF, Loc),
  3553. getThreadID(CGF, Loc),
  3554. IL.getPointer(), // &isLastIter
  3555. LB.getPointer(), // &Lower
  3556. UB.getPointer(), // &Upper
  3557. ST.getPointer() // &Stride
  3558. };
  3559. llvm::Value *Call =
  3560. CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
  3561. return CGF.EmitScalarConversion(
  3562. Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
  3563. CGF.getContext().BoolTy, Loc);
  3564. }
  3565. void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
  3566. llvm::Value *NumThreads,
  3567. SourceLocation Loc) {
  3568. if (!CGF.HaveInsertPoint())
  3569. return;
  3570. // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
  3571. llvm::Value *Args[] = {
  3572. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  3573. CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
  3574. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
  3575. Args);
  3576. }
  3577. void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
  3578. OpenMPProcBindClauseKind ProcBind,
  3579. SourceLocation Loc) {
  3580. if (!CGF.HaveInsertPoint())
  3581. return;
  3582. // Constants for proc bind value accepted by the runtime.
  3583. enum ProcBindTy {
  3584. ProcBindFalse = 0,
  3585. ProcBindTrue,
  3586. ProcBindMaster,
  3587. ProcBindClose,
  3588. ProcBindSpread,
  3589. ProcBindIntel,
  3590. ProcBindDefault
  3591. } RuntimeProcBind;
  3592. switch (ProcBind) {
  3593. case OMPC_PROC_BIND_master:
  3594. RuntimeProcBind = ProcBindMaster;
  3595. break;
  3596. case OMPC_PROC_BIND_close:
  3597. RuntimeProcBind = ProcBindClose;
  3598. break;
  3599. case OMPC_PROC_BIND_spread:
  3600. RuntimeProcBind = ProcBindSpread;
  3601. break;
  3602. case OMPC_PROC_BIND_unknown:
  3603. llvm_unreachable("Unsupported proc_bind value.");
  3604. }
  3605. // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
  3606. llvm::Value *Args[] = {
  3607. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  3608. llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
  3609. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
  3610. }
  3611. void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
  3612. SourceLocation Loc) {
  3613. if (!CGF.HaveInsertPoint())
  3614. return;
  3615. // Build call void __kmpc_flush(ident_t *loc)
  3616. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
  3617. emitUpdateLocation(CGF, Loc));
  3618. }
  3619. namespace {
  3620. /// Indexes of fields for type kmp_task_t.
  3621. enum KmpTaskTFields {
  3622. /// List of shared variables.
  3623. KmpTaskTShareds,
  3624. /// Task routine.
  3625. KmpTaskTRoutine,
  3626. /// Partition id for the untied tasks.
  3627. KmpTaskTPartId,
  3628. /// Function with call of destructors for private variables.
  3629. Data1,
  3630. /// Task priority.
  3631. Data2,
  3632. /// (Taskloops only) Lower bound.
  3633. KmpTaskTLowerBound,
  3634. /// (Taskloops only) Upper bound.
  3635. KmpTaskTUpperBound,
  3636. /// (Taskloops only) Stride.
  3637. KmpTaskTStride,
  3638. /// (Taskloops only) Is last iteration flag.
  3639. KmpTaskTLastIter,
  3640. /// (Taskloops only) Reduction data.
  3641. KmpTaskTReductions,
  3642. };
  3643. } // anonymous namespace
  3644. bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
  3645. return OffloadEntriesTargetRegion.empty() &&
  3646. OffloadEntriesDeviceGlobalVar.empty();
  3647. }
  3648. /// Initialize target region entry.
  3649. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
  3650. initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
  3651. StringRef ParentName, unsigned LineNum,
  3652. unsigned Order) {
  3653. assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
  3654. "only required for the device "
  3655. "code generation.");
  3656. OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
  3657. OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
  3658. OMPTargetRegionEntryTargetRegion);
  3659. ++OffloadingEntriesNum;
  3660. }
  3661. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
  3662. registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
  3663. StringRef ParentName, unsigned LineNum,
  3664. llvm::Constant *Addr, llvm::Constant *ID,
  3665. OMPTargetRegionEntryKind Flags) {
  3666. // If we are emitting code for a target, the entry is already initialized,
  3667. // only has to be registered.
  3668. if (CGM.getLangOpts().OpenMPIsDevice) {
  3669. if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
  3670. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  3671. DiagnosticsEngine::Error,
  3672. "Unable to find target region on line '%0' in the device code.");
  3673. CGM.getDiags().Report(DiagID) << LineNum;
  3674. return;
  3675. }
  3676. auto &Entry =
  3677. OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
  3678. assert(Entry.isValid() && "Entry not initialized!");
  3679. Entry.setAddress(Addr);
  3680. Entry.setID(ID);
  3681. Entry.setFlags(Flags);
  3682. } else {
  3683. OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
  3684. OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
  3685. ++OffloadingEntriesNum;
  3686. }
  3687. }
  3688. bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
  3689. unsigned DeviceID, unsigned FileID, StringRef ParentName,
  3690. unsigned LineNum) const {
  3691. auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
  3692. if (PerDevice == OffloadEntriesTargetRegion.end())
  3693. return false;
  3694. auto PerFile = PerDevice->second.find(FileID);
  3695. if (PerFile == PerDevice->second.end())
  3696. return false;
  3697. auto PerParentName = PerFile->second.find(ParentName);
  3698. if (PerParentName == PerFile->second.end())
  3699. return false;
  3700. auto PerLine = PerParentName->second.find(LineNum);
  3701. if (PerLine == PerParentName->second.end())
  3702. return false;
  3703. // Fail if this entry is already registered.
  3704. if (PerLine->second.getAddress() || PerLine->second.getID())
  3705. return false;
  3706. return true;
  3707. }
  3708. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
  3709. const OffloadTargetRegionEntryInfoActTy &Action) {
  3710. // Scan all target region entries and perform the provided action.
  3711. for (const auto &D : OffloadEntriesTargetRegion)
  3712. for (const auto &F : D.second)
  3713. for (const auto &P : F.second)
  3714. for (const auto &L : P.second)
  3715. Action(D.first, F.first, P.first(), L.first, L.second);
  3716. }
  3717. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
  3718. initializeDeviceGlobalVarEntryInfo(StringRef Name,
  3719. OMPTargetGlobalVarEntryKind Flags,
  3720. unsigned Order) {
  3721. assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
  3722. "only required for the device "
  3723. "code generation.");
  3724. OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
  3725. ++OffloadingEntriesNum;
  3726. }
  3727. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
  3728. registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
  3729. CharUnits VarSize,
  3730. OMPTargetGlobalVarEntryKind Flags,
  3731. llvm::GlobalValue::LinkageTypes Linkage) {
  3732. if (CGM.getLangOpts().OpenMPIsDevice) {
  3733. auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
  3734. assert(Entry.isValid() && Entry.getFlags() == Flags &&
  3735. "Entry not initialized!");
  3736. assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
  3737. "Resetting with the new address.");
  3738. if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
  3739. if (Entry.getVarSize().isZero()) {
  3740. Entry.setVarSize(VarSize);
  3741. Entry.setLinkage(Linkage);
  3742. }
  3743. return;
  3744. }
  3745. Entry.setVarSize(VarSize);
  3746. Entry.setLinkage(Linkage);
  3747. Entry.setAddress(Addr);
  3748. } else {
  3749. if (hasDeviceGlobalVarEntryInfo(VarName)) {
  3750. auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
  3751. assert(Entry.isValid() && Entry.getFlags() == Flags &&
  3752. "Entry not initialized!");
  3753. assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
  3754. "Resetting with the new address.");
  3755. if (Entry.getVarSize().isZero()) {
  3756. Entry.setVarSize(VarSize);
  3757. Entry.setLinkage(Linkage);
  3758. }
  3759. return;
  3760. }
  3761. OffloadEntriesDeviceGlobalVar.try_emplace(
  3762. VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
  3763. ++OffloadingEntriesNum;
  3764. }
  3765. }
  3766. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
  3767. actOnDeviceGlobalVarEntriesInfo(
  3768. const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
  3769. // Scan all target region entries and perform the provided action.
  3770. for (const auto &E : OffloadEntriesDeviceGlobalVar)
  3771. Action(E.getKey(), E.getValue());
  3772. }
  3773. llvm::Function *
  3774. CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
  3775. // If we don't have entries or if we are emitting code for the device, we
  3776. // don't need to do anything.
  3777. if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
  3778. return nullptr;
  3779. llvm::Module &M = CGM.getModule();
  3780. ASTContext &C = CGM.getContext();
  3781. // Get list of devices we care about
  3782. const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
  3783. // We should be creating an offloading descriptor only if there are devices
  3784. // specified.
  3785. assert(!Devices.empty() && "No OpenMP offloading devices??");
  3786. // Create the external variables that will point to the begin and end of the
  3787. // host entries section. These will be defined by the linker.
  3788. llvm::Type *OffloadEntryTy =
  3789. CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
  3790. auto *HostEntriesBegin = new llvm::GlobalVariable(
  3791. M, OffloadEntryTy, /*isConstant=*/true,
  3792. llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
  3793. "__start_omp_offloading_entries");
  3794. HostEntriesBegin->setVisibility(llvm::GlobalValue::HiddenVisibility);
  3795. auto *HostEntriesEnd = new llvm::GlobalVariable(
  3796. M, OffloadEntryTy, /*isConstant=*/true,
  3797. llvm::GlobalValue::ExternalLinkage,
  3798. /*Initializer=*/nullptr, "__stop_omp_offloading_entries");
  3799. HostEntriesEnd->setVisibility(llvm::GlobalValue::HiddenVisibility);
  3800. // Create all device images
  3801. auto *DeviceImageTy = cast<llvm::StructType>(
  3802. CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
  3803. ConstantInitBuilder DeviceImagesBuilder(CGM);
  3804. ConstantArrayBuilder DeviceImagesEntries =
  3805. DeviceImagesBuilder.beginArray(DeviceImageTy);
  3806. for (const llvm::Triple &Device : Devices) {
  3807. StringRef T = Device.getTriple();
  3808. std::string BeginName = getName({"omp_offloading", "img_start", ""});
  3809. auto *ImgBegin = new llvm::GlobalVariable(
  3810. M, CGM.Int8Ty, /*isConstant=*/true,
  3811. llvm::GlobalValue::ExternalWeakLinkage,
  3812. /*Initializer=*/nullptr, Twine(BeginName).concat(T));
  3813. std::string EndName = getName({"omp_offloading", "img_end", ""});
  3814. auto *ImgEnd = new llvm::GlobalVariable(
  3815. M, CGM.Int8Ty, /*isConstant=*/true,
  3816. llvm::GlobalValue::ExternalWeakLinkage,
  3817. /*Initializer=*/nullptr, Twine(EndName).concat(T));
  3818. llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
  3819. HostEntriesEnd};
  3820. createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
  3821. DeviceImagesEntries);
  3822. }
  3823. // Create device images global array.
  3824. std::string ImagesName = getName({"omp_offloading", "device_images"});
  3825. llvm::GlobalVariable *DeviceImages =
  3826. DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
  3827. CGM.getPointerAlign(),
  3828. /*isConstant=*/true);
  3829. DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  3830. // This is a Zero array to be used in the creation of the constant expressions
  3831. llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
  3832. llvm::Constant::getNullValue(CGM.Int32Ty)};
  3833. // Create the target region descriptor.
  3834. llvm::Constant *Data[] = {
  3835. llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
  3836. llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
  3837. DeviceImages, Index),
  3838. HostEntriesBegin, HostEntriesEnd};
  3839. std::string Descriptor = getName({"omp_offloading", "descriptor"});
  3840. llvm::GlobalVariable *Desc = createGlobalStruct(
  3841. CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
  3842. // Emit code to register or unregister the descriptor at execution
  3843. // startup or closing, respectively.
  3844. llvm::Function *UnRegFn;
  3845. {
  3846. FunctionArgList Args;
  3847. ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
  3848. Args.push_back(&DummyPtr);
  3849. CodeGenFunction CGF(CGM);
  3850. // Disable debug info for global (de-)initializer because they are not part
  3851. // of some particular construct.
  3852. CGF.disableDebugInfo();
  3853. const auto &FI =
  3854. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  3855. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  3856. std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
  3857. UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
  3858. CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
  3859. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
  3860. Desc);
  3861. CGF.FinishFunction();
  3862. }
  3863. llvm::Function *RegFn;
  3864. {
  3865. CodeGenFunction CGF(CGM);
  3866. // Disable debug info for global (de-)initializer because they are not part
  3867. // of some particular construct.
  3868. CGF.disableDebugInfo();
  3869. const auto &FI = CGM.getTypes().arrangeNullaryFunction();
  3870. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  3871. // Encode offload target triples into the registration function name. It
  3872. // will serve as a comdat key for the registration/unregistration code for
  3873. // this particular combination of offloading targets.
  3874. SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
  3875. RegFnNameParts[0] = "omp_offloading";
  3876. RegFnNameParts[1] = "descriptor_reg";
  3877. llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
  3878. [](const llvm::Triple &T) -> const std::string& {
  3879. return T.getTriple();
  3880. });
  3881. llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
  3882. std::string Descriptor = getName(RegFnNameParts);
  3883. RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
  3884. CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
  3885. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
  3886. // Create a variable to drive the registration and unregistration of the
  3887. // descriptor, so we can reuse the logic that emits Ctors and Dtors.
  3888. ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
  3889. SourceLocation(), nullptr, C.CharTy,
  3890. ImplicitParamDecl::Other);
  3891. CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
  3892. CGF.FinishFunction();
  3893. }
  3894. if (CGM.supportsCOMDAT()) {
  3895. // It is sufficient to call registration function only once, so create a
  3896. // COMDAT group for registration/unregistration functions and associated
  3897. // data. That would reduce startup time and code size. Registration
  3898. // function serves as a COMDAT group key.
  3899. llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
  3900. RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
  3901. RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
  3902. RegFn->setComdat(ComdatKey);
  3903. UnRegFn->setComdat(ComdatKey);
  3904. DeviceImages->setComdat(ComdatKey);
  3905. Desc->setComdat(ComdatKey);
  3906. }
  3907. return RegFn;
  3908. }
  3909. void CGOpenMPRuntime::createOffloadEntry(
  3910. llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
  3911. llvm::GlobalValue::LinkageTypes Linkage) {
  3912. StringRef Name = Addr->getName();
  3913. llvm::Module &M = CGM.getModule();
  3914. llvm::LLVMContext &C = M.getContext();
  3915. // Create constant string with the name.
  3916. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
  3917. std::string StringName = getName({"omp_offloading", "entry_name"});
  3918. auto *Str = new llvm::GlobalVariable(
  3919. M, StrPtrInit->getType(), /*isConstant=*/true,
  3920. llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
  3921. Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  3922. llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
  3923. llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
  3924. llvm::ConstantInt::get(CGM.SizeTy, Size),
  3925. llvm::ConstantInt::get(CGM.Int32Ty, Flags),
  3926. llvm::ConstantInt::get(CGM.Int32Ty, 0)};
  3927. std::string EntryName = getName({"omp_offloading", "entry", ""});
  3928. llvm::GlobalVariable *Entry = createGlobalStruct(
  3929. CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
  3930. Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
  3931. // The entry has to be created in the section the linker expects it to be.
  3932. Entry->setSection("omp_offloading_entries");
  3933. }
  3934. void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
  3935. // Emit the offloading entries and metadata so that the device codegen side
  3936. // can easily figure out what to emit. The produced metadata looks like
  3937. // this:
  3938. //
  3939. // !omp_offload.info = !{!1, ...}
  3940. //
  3941. // Right now we only generate metadata for function that contain target
  3942. // regions.
  3943. // If we do not have entries, we don't need to do anything.
  3944. if (OffloadEntriesInfoManager.empty())
  3945. return;
  3946. llvm::Module &M = CGM.getModule();
  3947. llvm::LLVMContext &C = M.getContext();
  3948. SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
  3949. SourceLocation, StringRef>,
  3950. 16>
  3951. OrderedEntries(OffloadEntriesInfoManager.size());
  3952. llvm::SmallVector<StringRef, 16> ParentFunctions(
  3953. OffloadEntriesInfoManager.size());
  3954. // Auxiliary methods to create metadata values and strings.
  3955. auto &&GetMDInt = [this](unsigned V) {
  3956. return llvm::ConstantAsMetadata::get(
  3957. llvm::ConstantInt::get(CGM.Int32Ty, V));
  3958. };
  3959. auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
  3960. // Create the offloading info metadata node.
  3961. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
  3962. // Create function that emits metadata for each target region entry;
  3963. auto &&TargetRegionMetadataEmitter =
  3964. [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
  3965. &GetMDString](
  3966. unsigned DeviceID, unsigned FileID, StringRef ParentName,
  3967. unsigned Line,
  3968. const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
  3969. // Generate metadata for target regions. Each entry of this metadata
  3970. // contains:
  3971. // - Entry 0 -> Kind of this type of metadata (0).
  3972. // - Entry 1 -> Device ID of the file where the entry was identified.
  3973. // - Entry 2 -> File ID of the file where the entry was identified.
  3974. // - Entry 3 -> Mangled name of the function where the entry was
  3975. // identified.
  3976. // - Entry 4 -> Line in the file where the entry was identified.
  3977. // - Entry 5 -> Order the entry was created.
  3978. // The first element of the metadata node is the kind.
  3979. llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
  3980. GetMDInt(FileID), GetMDString(ParentName),
  3981. GetMDInt(Line), GetMDInt(E.getOrder())};
  3982. SourceLocation Loc;
  3983. for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
  3984. E = CGM.getContext().getSourceManager().fileinfo_end();
  3985. I != E; ++I) {
  3986. if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
  3987. I->getFirst()->getUniqueID().getFile() == FileID) {
  3988. Loc = CGM.getContext().getSourceManager().translateFileLineCol(
  3989. I->getFirst(), Line, 1);
  3990. break;
  3991. }
  3992. }
  3993. // Save this entry in the right position of the ordered entries array.
  3994. OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
  3995. ParentFunctions[E.getOrder()] = ParentName;
  3996. // Add metadata to the named metadata node.
  3997. MD->addOperand(llvm::MDNode::get(C, Ops));
  3998. };
  3999. OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
  4000. TargetRegionMetadataEmitter);
  4001. // Create function that emits metadata for each device global variable entry;
  4002. auto &&DeviceGlobalVarMetadataEmitter =
  4003. [&C, &OrderedEntries, &GetMDInt, &GetMDString,
  4004. MD](StringRef MangledName,
  4005. const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
  4006. &E) {
  4007. // Generate metadata for global variables. Each entry of this metadata
  4008. // contains:
  4009. // - Entry 0 -> Kind of this type of metadata (1).
  4010. // - Entry 1 -> Mangled name of the variable.
  4011. // - Entry 2 -> Declare target kind.
  4012. // - Entry 3 -> Order the entry was created.
  4013. // The first element of the metadata node is the kind.
  4014. llvm::Metadata *Ops[] = {
  4015. GetMDInt(E.getKind()), GetMDString(MangledName),
  4016. GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
  4017. // Save this entry in the right position of the ordered entries array.
  4018. OrderedEntries[E.getOrder()] =
  4019. std::make_tuple(&E, SourceLocation(), MangledName);
  4020. // Add metadata to the named metadata node.
  4021. MD->addOperand(llvm::MDNode::get(C, Ops));
  4022. };
  4023. OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
  4024. DeviceGlobalVarMetadataEmitter);
  4025. for (const auto &E : OrderedEntries) {
  4026. assert(std::get<0>(E) && "All ordered entries must exist!");
  4027. if (const auto *CE =
  4028. dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
  4029. std::get<0>(E))) {
  4030. if (!CE->getID() || !CE->getAddress()) {
  4031. // Do not blame the entry if the parent funtion is not emitted.
  4032. StringRef FnName = ParentFunctions[CE->getOrder()];
  4033. if (!CGM.GetGlobalValue(FnName))
  4034. continue;
  4035. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  4036. DiagnosticsEngine::Error,
  4037. "Offloading entry for target region in %0 is incorrect: either the "
  4038. "address or the ID is invalid.");
  4039. CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
  4040. continue;
  4041. }
  4042. createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
  4043. CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
  4044. } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
  4045. OffloadEntryInfoDeviceGlobalVar>(
  4046. std::get<0>(E))) {
  4047. OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
  4048. static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
  4049. CE->getFlags());
  4050. switch (Flags) {
  4051. case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
  4052. if (CGM.getLangOpts().OpenMPIsDevice &&
  4053. CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
  4054. continue;
  4055. if (!CE->getAddress()) {
  4056. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  4057. DiagnosticsEngine::Error, "Offloading entry for declare target "
  4058. "variable %0 is incorrect: the "
  4059. "address is invalid.");
  4060. CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
  4061. continue;
  4062. }
  4063. // The vaiable has no definition - no need to add the entry.
  4064. if (CE->getVarSize().isZero())
  4065. continue;
  4066. break;
  4067. }
  4068. case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
  4069. assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
  4070. (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
  4071. "Declaret target link address is set.");
  4072. if (CGM.getLangOpts().OpenMPIsDevice)
  4073. continue;
  4074. if (!CE->getAddress()) {
  4075. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  4076. DiagnosticsEngine::Error,
  4077. "Offloading entry for declare target variable is incorrect: the "
  4078. "address is invalid.");
  4079. CGM.getDiags().Report(DiagID);
  4080. continue;
  4081. }
  4082. break;
  4083. }
  4084. createOffloadEntry(CE->getAddress(), CE->getAddress(),
  4085. CE->getVarSize().getQuantity(), Flags,
  4086. CE->getLinkage());
  4087. } else {
  4088. llvm_unreachable("Unsupported entry kind.");
  4089. }
  4090. }
  4091. }
  4092. /// Loads all the offload entries information from the host IR
  4093. /// metadata.
  4094. void CGOpenMPRuntime::loadOffloadInfoMetadata() {
  4095. // If we are in target mode, load the metadata from the host IR. This code has
  4096. // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
  4097. if (!CGM.getLangOpts().OpenMPIsDevice)
  4098. return;
  4099. if (CGM.getLangOpts().OMPHostIRFile.empty())
  4100. return;
  4101. auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
  4102. if (auto EC = Buf.getError()) {
  4103. CGM.getDiags().Report(diag::err_cannot_open_file)
  4104. << CGM.getLangOpts().OMPHostIRFile << EC.message();
  4105. return;
  4106. }
  4107. llvm::LLVMContext C;
  4108. auto ME = expectedToErrorOrAndEmitErrors(
  4109. C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
  4110. if (auto EC = ME.getError()) {
  4111. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  4112. DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
  4113. CGM.getDiags().Report(DiagID)
  4114. << CGM.getLangOpts().OMPHostIRFile << EC.message();
  4115. return;
  4116. }
  4117. llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
  4118. if (!MD)
  4119. return;
  4120. for (llvm::MDNode *MN : MD->operands()) {
  4121. auto &&GetMDInt = [MN](unsigned Idx) {
  4122. auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
  4123. return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
  4124. };
  4125. auto &&GetMDString = [MN](unsigned Idx) {
  4126. auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
  4127. return V->getString();
  4128. };
  4129. switch (GetMDInt(0)) {
  4130. default:
  4131. llvm_unreachable("Unexpected metadata!");
  4132. break;
  4133. case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
  4134. OffloadingEntryInfoTargetRegion:
  4135. OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
  4136. /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
  4137. /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
  4138. /*Order=*/GetMDInt(5));
  4139. break;
  4140. case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
  4141. OffloadingEntryInfoDeviceGlobalVar:
  4142. OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
  4143. /*MangledName=*/GetMDString(1),
  4144. static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
  4145. /*Flags=*/GetMDInt(2)),
  4146. /*Order=*/GetMDInt(3));
  4147. break;
  4148. }
  4149. }
  4150. }
  4151. void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
  4152. if (!KmpRoutineEntryPtrTy) {
  4153. // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
  4154. ASTContext &C = CGM.getContext();
  4155. QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
  4156. FunctionProtoType::ExtProtoInfo EPI;
  4157. KmpRoutineEntryPtrQTy = C.getPointerType(
  4158. C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
  4159. KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
  4160. }
  4161. }
  4162. QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
  4163. // Make sure the type of the entry is already created. This is the type we
  4164. // have to create:
  4165. // struct __tgt_offload_entry{
  4166. // void *addr; // Pointer to the offload entry info.
  4167. // // (function or global)
  4168. // char *name; // Name of the function or global.
  4169. // size_t size; // Size of the entry info (0 if it a function).
  4170. // int32_t flags; // Flags associated with the entry, e.g. 'link'.
  4171. // int32_t reserved; // Reserved, to use by the runtime library.
  4172. // };
  4173. if (TgtOffloadEntryQTy.isNull()) {
  4174. ASTContext &C = CGM.getContext();
  4175. RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
  4176. RD->startDefinition();
  4177. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  4178. addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
  4179. addFieldToRecordDecl(C, RD, C.getSizeType());
  4180. addFieldToRecordDecl(
  4181. C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
  4182. addFieldToRecordDecl(
  4183. C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
  4184. RD->completeDefinition();
  4185. RD->addAttr(PackedAttr::CreateImplicit(C));
  4186. TgtOffloadEntryQTy = C.getRecordType(RD);
  4187. }
  4188. return TgtOffloadEntryQTy;
  4189. }
  4190. QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
  4191. // These are the types we need to build:
  4192. // struct __tgt_device_image{
  4193. // void *ImageStart; // Pointer to the target code start.
  4194. // void *ImageEnd; // Pointer to the target code end.
  4195. // // We also add the host entries to the device image, as it may be useful
  4196. // // for the target runtime to have access to that information.
  4197. // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
  4198. // // the entries.
  4199. // __tgt_offload_entry *EntriesEnd; // End of the table with all the
  4200. // // entries (non inclusive).
  4201. // };
  4202. if (TgtDeviceImageQTy.isNull()) {
  4203. ASTContext &C = CGM.getContext();
  4204. RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
  4205. RD->startDefinition();
  4206. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  4207. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  4208. addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
  4209. addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
  4210. RD->completeDefinition();
  4211. TgtDeviceImageQTy = C.getRecordType(RD);
  4212. }
  4213. return TgtDeviceImageQTy;
  4214. }
  4215. QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
  4216. // struct __tgt_bin_desc{
  4217. // int32_t NumDevices; // Number of devices supported.
  4218. // __tgt_device_image *DeviceImages; // Arrays of device images
  4219. // // (one per device).
  4220. // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
  4221. // // entries.
  4222. // __tgt_offload_entry *EntriesEnd; // End of the table with all the
  4223. // // entries (non inclusive).
  4224. // };
  4225. if (TgtBinaryDescriptorQTy.isNull()) {
  4226. ASTContext &C = CGM.getContext();
  4227. RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
  4228. RD->startDefinition();
  4229. addFieldToRecordDecl(
  4230. C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
  4231. addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
  4232. addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
  4233. addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
  4234. RD->completeDefinition();
  4235. TgtBinaryDescriptorQTy = C.getRecordType(RD);
  4236. }
  4237. return TgtBinaryDescriptorQTy;
  4238. }
  4239. namespace {
  4240. struct PrivateHelpersTy {
  4241. PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
  4242. const VarDecl *PrivateElemInit)
  4243. : Original(Original), PrivateCopy(PrivateCopy),
  4244. PrivateElemInit(PrivateElemInit) {}
  4245. const VarDecl *Original;
  4246. const VarDecl *PrivateCopy;
  4247. const VarDecl *PrivateElemInit;
  4248. };
  4249. typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
  4250. } // anonymous namespace
  4251. static RecordDecl *
  4252. createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
  4253. if (!Privates.empty()) {
  4254. ASTContext &C = CGM.getContext();
  4255. // Build struct .kmp_privates_t. {
  4256. // /* private vars */
  4257. // };
  4258. RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
  4259. RD->startDefinition();
  4260. for (const auto &Pair : Privates) {
  4261. const VarDecl *VD = Pair.second.Original;
  4262. QualType Type = VD->getType().getNonReferenceType();
  4263. FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
  4264. if (VD->hasAttrs()) {
  4265. for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
  4266. E(VD->getAttrs().end());
  4267. I != E; ++I)
  4268. FD->addAttr(*I);
  4269. }
  4270. }
  4271. RD->completeDefinition();
  4272. return RD;
  4273. }
  4274. return nullptr;
  4275. }
  4276. static RecordDecl *
  4277. createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
  4278. QualType KmpInt32Ty,
  4279. QualType KmpRoutineEntryPointerQTy) {
  4280. ASTContext &C = CGM.getContext();
  4281. // Build struct kmp_task_t {
  4282. // void * shareds;
  4283. // kmp_routine_entry_t routine;
  4284. // kmp_int32 part_id;
  4285. // kmp_cmplrdata_t data1;
  4286. // kmp_cmplrdata_t data2;
  4287. // For taskloops additional fields:
  4288. // kmp_uint64 lb;
  4289. // kmp_uint64 ub;
  4290. // kmp_int64 st;
  4291. // kmp_int32 liter;
  4292. // void * reductions;
  4293. // };
  4294. RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
  4295. UD->startDefinition();
  4296. addFieldToRecordDecl(C, UD, KmpInt32Ty);
  4297. addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
  4298. UD->completeDefinition();
  4299. QualType KmpCmplrdataTy = C.getRecordType(UD);
  4300. RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
  4301. RD->startDefinition();
  4302. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  4303. addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
  4304. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  4305. addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
  4306. addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
  4307. if (isOpenMPTaskLoopDirective(Kind)) {
  4308. QualType KmpUInt64Ty =
  4309. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
  4310. QualType KmpInt64Ty =
  4311. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  4312. addFieldToRecordDecl(C, RD, KmpUInt64Ty);
  4313. addFieldToRecordDecl(C, RD, KmpUInt64Ty);
  4314. addFieldToRecordDecl(C, RD, KmpInt64Ty);
  4315. addFieldToRecordDecl(C, RD, KmpInt32Ty);
  4316. addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  4317. }
  4318. RD->completeDefinition();
  4319. return RD;
  4320. }
  4321. static RecordDecl *
  4322. createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
  4323. ArrayRef<PrivateDataTy> Privates) {
  4324. ASTContext &C = CGM.getContext();
  4325. // Build struct kmp_task_t_with_privates {
  4326. // kmp_task_t task_data;
  4327. // .kmp_privates_t. privates;
  4328. // };
  4329. RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
  4330. RD->startDefinition();
  4331. addFieldToRecordDecl(C, RD, KmpTaskTQTy);
  4332. if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
  4333. addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
  4334. RD->completeDefinition();
  4335. return RD;
  4336. }
  4337. /// Emit a proxy function which accepts kmp_task_t as the second
  4338. /// argument.
  4339. /// \code
  4340. /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
  4341. /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
  4342. /// For taskloops:
  4343. /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
  4344. /// tt->reductions, tt->shareds);
  4345. /// return 0;
  4346. /// }
  4347. /// \endcode
  4348. static llvm::Function *
  4349. emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
  4350. OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
  4351. QualType KmpTaskTWithPrivatesPtrQTy,
  4352. QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
  4353. QualType SharedsPtrTy, llvm::Function *TaskFunction,
  4354. llvm::Value *TaskPrivatesMap) {
  4355. ASTContext &C = CGM.getContext();
  4356. FunctionArgList Args;
  4357. ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
  4358. ImplicitParamDecl::Other);
  4359. ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4360. KmpTaskTWithPrivatesPtrQTy.withRestrict(),
  4361. ImplicitParamDecl::Other);
  4362. Args.push_back(&GtidArg);
  4363. Args.push_back(&TaskTypeArg);
  4364. const auto &TaskEntryFnInfo =
  4365. CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
  4366. llvm::FunctionType *TaskEntryTy =
  4367. CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
  4368. std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
  4369. auto *TaskEntry = llvm::Function::Create(
  4370. TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
  4371. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
  4372. TaskEntry->setDoesNotRecurse();
  4373. CodeGenFunction CGF(CGM);
  4374. CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
  4375. Loc, Loc);
  4376. // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
  4377. // tt,
  4378. // For taskloops:
  4379. // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
  4380. // tt->task_data.shareds);
  4381. llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
  4382. CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
  4383. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  4384. CGF.GetAddrOfLocalVar(&TaskTypeArg),
  4385. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  4386. const auto *KmpTaskTWithPrivatesQTyRD =
  4387. cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
  4388. LValue Base =
  4389. CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  4390. const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
  4391. auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
  4392. LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
  4393. llvm::Value *PartidParam = PartIdLVal.getPointer();
  4394. auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
  4395. LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
  4396. llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4397. CGF.EmitLoadOfScalar(SharedsLVal, Loc),
  4398. CGF.ConvertTypeForMem(SharedsPtrTy));
  4399. auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
  4400. llvm::Value *PrivatesParam;
  4401. if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
  4402. LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
  4403. PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4404. PrivatesLVal.getPointer(), CGF.VoidPtrTy);
  4405. } else {
  4406. PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  4407. }
  4408. llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
  4409. TaskPrivatesMap,
  4410. CGF.Builder
  4411. .CreatePointerBitCastOrAddrSpaceCast(
  4412. TDBase.getAddress(), CGF.VoidPtrTy)
  4413. .getPointer()};
  4414. SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
  4415. std::end(CommonArgs));
  4416. if (isOpenMPTaskLoopDirective(Kind)) {
  4417. auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
  4418. LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
  4419. llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
  4420. auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
  4421. LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
  4422. llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
  4423. auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
  4424. LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
  4425. llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
  4426. auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
  4427. LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
  4428. llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
  4429. auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
  4430. LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
  4431. llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
  4432. CallArgs.push_back(LBParam);
  4433. CallArgs.push_back(UBParam);
  4434. CallArgs.push_back(StParam);
  4435. CallArgs.push_back(LIParam);
  4436. CallArgs.push_back(RParam);
  4437. }
  4438. CallArgs.push_back(SharedsParam);
  4439. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
  4440. CallArgs);
  4441. CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
  4442. CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
  4443. CGF.FinishFunction();
  4444. return TaskEntry;
  4445. }
  4446. static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
  4447. SourceLocation Loc,
  4448. QualType KmpInt32Ty,
  4449. QualType KmpTaskTWithPrivatesPtrQTy,
  4450. QualType KmpTaskTWithPrivatesQTy) {
  4451. ASTContext &C = CGM.getContext();
  4452. FunctionArgList Args;
  4453. ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
  4454. ImplicitParamDecl::Other);
  4455. ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4456. KmpTaskTWithPrivatesPtrQTy.withRestrict(),
  4457. ImplicitParamDecl::Other);
  4458. Args.push_back(&GtidArg);
  4459. Args.push_back(&TaskTypeArg);
  4460. const auto &DestructorFnInfo =
  4461. CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
  4462. llvm::FunctionType *DestructorFnTy =
  4463. CGM.getTypes().GetFunctionType(DestructorFnInfo);
  4464. std::string Name =
  4465. CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
  4466. auto *DestructorFn =
  4467. llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
  4468. Name, &CGM.getModule());
  4469. CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
  4470. DestructorFnInfo);
  4471. DestructorFn->setDoesNotRecurse();
  4472. CodeGenFunction CGF(CGM);
  4473. CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
  4474. Args, Loc, Loc);
  4475. LValue Base = CGF.EmitLoadOfPointerLValue(
  4476. CGF.GetAddrOfLocalVar(&TaskTypeArg),
  4477. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  4478. const auto *KmpTaskTWithPrivatesQTyRD =
  4479. cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
  4480. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  4481. Base = CGF.EmitLValueForField(Base, *FI);
  4482. for (const auto *Field :
  4483. cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
  4484. if (QualType::DestructionKind DtorKind =
  4485. Field->getType().isDestructedType()) {
  4486. LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
  4487. CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
  4488. }
  4489. }
  4490. CGF.FinishFunction();
  4491. return DestructorFn;
  4492. }
  4493. /// Emit a privates mapping function for correct handling of private and
  4494. /// firstprivate variables.
  4495. /// \code
  4496. /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
  4497. /// **noalias priv1,..., <tyn> **noalias privn) {
  4498. /// *priv1 = &.privates.priv1;
  4499. /// ...;
  4500. /// *privn = &.privates.privn;
  4501. /// }
  4502. /// \endcode
  4503. static llvm::Value *
  4504. emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
  4505. ArrayRef<const Expr *> PrivateVars,
  4506. ArrayRef<const Expr *> FirstprivateVars,
  4507. ArrayRef<const Expr *> LastprivateVars,
  4508. QualType PrivatesQTy,
  4509. ArrayRef<PrivateDataTy> Privates) {
  4510. ASTContext &C = CGM.getContext();
  4511. FunctionArgList Args;
  4512. ImplicitParamDecl TaskPrivatesArg(
  4513. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4514. C.getPointerType(PrivatesQTy).withConst().withRestrict(),
  4515. ImplicitParamDecl::Other);
  4516. Args.push_back(&TaskPrivatesArg);
  4517. llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
  4518. unsigned Counter = 1;
  4519. for (const Expr *E : PrivateVars) {
  4520. Args.push_back(ImplicitParamDecl::Create(
  4521. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4522. C.getPointerType(C.getPointerType(E->getType()))
  4523. .withConst()
  4524. .withRestrict(),
  4525. ImplicitParamDecl::Other));
  4526. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4527. PrivateVarsPos[VD] = Counter;
  4528. ++Counter;
  4529. }
  4530. for (const Expr *E : FirstprivateVars) {
  4531. Args.push_back(ImplicitParamDecl::Create(
  4532. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4533. C.getPointerType(C.getPointerType(E->getType()))
  4534. .withConst()
  4535. .withRestrict(),
  4536. ImplicitParamDecl::Other));
  4537. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4538. PrivateVarsPos[VD] = Counter;
  4539. ++Counter;
  4540. }
  4541. for (const Expr *E : LastprivateVars) {
  4542. Args.push_back(ImplicitParamDecl::Create(
  4543. C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4544. C.getPointerType(C.getPointerType(E->getType()))
  4545. .withConst()
  4546. .withRestrict(),
  4547. ImplicitParamDecl::Other));
  4548. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4549. PrivateVarsPos[VD] = Counter;
  4550. ++Counter;
  4551. }
  4552. const auto &TaskPrivatesMapFnInfo =
  4553. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  4554. llvm::FunctionType *TaskPrivatesMapTy =
  4555. CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
  4556. std::string Name =
  4557. CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
  4558. auto *TaskPrivatesMap = llvm::Function::Create(
  4559. TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
  4560. &CGM.getModule());
  4561. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
  4562. TaskPrivatesMapFnInfo);
  4563. if (CGM.getLangOpts().Optimize) {
  4564. TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
  4565. TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
  4566. TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
  4567. }
  4568. CodeGenFunction CGF(CGM);
  4569. CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
  4570. TaskPrivatesMapFnInfo, Args, Loc, Loc);
  4571. // *privi = &.privates.privi;
  4572. LValue Base = CGF.EmitLoadOfPointerLValue(
  4573. CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
  4574. TaskPrivatesArg.getType()->castAs<PointerType>());
  4575. const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
  4576. Counter = 0;
  4577. for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
  4578. LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
  4579. const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
  4580. LValue RefLVal =
  4581. CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
  4582. LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
  4583. RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
  4584. CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
  4585. ++Counter;
  4586. }
  4587. CGF.FinishFunction();
  4588. return TaskPrivatesMap;
  4589. }
  4590. /// Emit initialization for private variables in task-based directives.
  4591. static void emitPrivatesInit(CodeGenFunction &CGF,
  4592. const OMPExecutableDirective &D,
  4593. Address KmpTaskSharedsPtr, LValue TDBase,
  4594. const RecordDecl *KmpTaskTWithPrivatesQTyRD,
  4595. QualType SharedsTy, QualType SharedsPtrTy,
  4596. const OMPTaskDataTy &Data,
  4597. ArrayRef<PrivateDataTy> Privates, bool ForDup) {
  4598. ASTContext &C = CGF.getContext();
  4599. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  4600. LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
  4601. OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
  4602. ? OMPD_taskloop
  4603. : OMPD_task;
  4604. const CapturedStmt &CS = *D.getCapturedStmt(Kind);
  4605. CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
  4606. LValue SrcBase;
  4607. bool IsTargetTask =
  4608. isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
  4609. isOpenMPTargetExecutionDirective(D.getDirectiveKind());
  4610. // For target-based directives skip 3 firstprivate arrays BasePointersArray,
  4611. // PointersArray and SizesArray. The original variables for these arrays are
  4612. // not captured and we get their addresses explicitly.
  4613. if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
  4614. (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
  4615. SrcBase = CGF.MakeAddrLValue(
  4616. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4617. KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
  4618. SharedsTy);
  4619. }
  4620. FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
  4621. for (const PrivateDataTy &Pair : Privates) {
  4622. const VarDecl *VD = Pair.second.PrivateCopy;
  4623. const Expr *Init = VD->getAnyInitializer();
  4624. if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
  4625. !CGF.isTrivialInitializer(Init)))) {
  4626. LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
  4627. if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
  4628. const VarDecl *OriginalVD = Pair.second.Original;
  4629. // Check if the variable is the target-based BasePointersArray,
  4630. // PointersArray or SizesArray.
  4631. LValue SharedRefLValue;
  4632. QualType Type = PrivateLValue.getType();
  4633. const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
  4634. if (IsTargetTask && !SharedField) {
  4635. assert(isa<ImplicitParamDecl>(OriginalVD) &&
  4636. isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
  4637. cast<CapturedDecl>(OriginalVD->getDeclContext())
  4638. ->getNumParams() == 0 &&
  4639. isa<TranslationUnitDecl>(
  4640. cast<CapturedDecl>(OriginalVD->getDeclContext())
  4641. ->getDeclContext()) &&
  4642. "Expected artificial target data variable.");
  4643. SharedRefLValue =
  4644. CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
  4645. } else {
  4646. SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
  4647. SharedRefLValue = CGF.MakeAddrLValue(
  4648. Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
  4649. SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
  4650. SharedRefLValue.getTBAAInfo());
  4651. }
  4652. if (Type->isArrayType()) {
  4653. // Initialize firstprivate array.
  4654. if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
  4655. // Perform simple memcpy.
  4656. CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
  4657. } else {
  4658. // Initialize firstprivate array using element-by-element
  4659. // initialization.
  4660. CGF.EmitOMPAggregateAssign(
  4661. PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
  4662. [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
  4663. Address SrcElement) {
  4664. // Clean up any temporaries needed by the initialization.
  4665. CodeGenFunction::OMPPrivateScope InitScope(CGF);
  4666. InitScope.addPrivate(
  4667. Elem, [SrcElement]() -> Address { return SrcElement; });
  4668. (void)InitScope.Privatize();
  4669. // Emit initialization for single element.
  4670. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
  4671. CGF, &CapturesInfo);
  4672. CGF.EmitAnyExprToMem(Init, DestElement,
  4673. Init->getType().getQualifiers(),
  4674. /*IsInitializer=*/false);
  4675. });
  4676. }
  4677. } else {
  4678. CodeGenFunction::OMPPrivateScope InitScope(CGF);
  4679. InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
  4680. return SharedRefLValue.getAddress();
  4681. });
  4682. (void)InitScope.Privatize();
  4683. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
  4684. CGF.EmitExprAsInit(Init, VD, PrivateLValue,
  4685. /*capturedByInit=*/false);
  4686. }
  4687. } else {
  4688. CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
  4689. }
  4690. }
  4691. ++FI;
  4692. }
  4693. }
  4694. /// Check if duplication function is required for taskloops.
  4695. static bool checkInitIsRequired(CodeGenFunction &CGF,
  4696. ArrayRef<PrivateDataTy> Privates) {
  4697. bool InitRequired = false;
  4698. for (const PrivateDataTy &Pair : Privates) {
  4699. const VarDecl *VD = Pair.second.PrivateCopy;
  4700. const Expr *Init = VD->getAnyInitializer();
  4701. InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
  4702. !CGF.isTrivialInitializer(Init));
  4703. if (InitRequired)
  4704. break;
  4705. }
  4706. return InitRequired;
  4707. }
  4708. /// Emit task_dup function (for initialization of
  4709. /// private/firstprivate/lastprivate vars and last_iter flag)
  4710. /// \code
  4711. /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
  4712. /// lastpriv) {
  4713. /// // setup lastprivate flag
  4714. /// task_dst->last = lastpriv;
  4715. /// // could be constructor calls here...
  4716. /// }
  4717. /// \endcode
  4718. static llvm::Value *
  4719. emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
  4720. const OMPExecutableDirective &D,
  4721. QualType KmpTaskTWithPrivatesPtrQTy,
  4722. const RecordDecl *KmpTaskTWithPrivatesQTyRD,
  4723. const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
  4724. QualType SharedsPtrTy, const OMPTaskDataTy &Data,
  4725. ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
  4726. ASTContext &C = CGM.getContext();
  4727. FunctionArgList Args;
  4728. ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4729. KmpTaskTWithPrivatesPtrQTy,
  4730. ImplicitParamDecl::Other);
  4731. ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  4732. KmpTaskTWithPrivatesPtrQTy,
  4733. ImplicitParamDecl::Other);
  4734. ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
  4735. ImplicitParamDecl::Other);
  4736. Args.push_back(&DstArg);
  4737. Args.push_back(&SrcArg);
  4738. Args.push_back(&LastprivArg);
  4739. const auto &TaskDupFnInfo =
  4740. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  4741. llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
  4742. std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
  4743. auto *TaskDup = llvm::Function::Create(
  4744. TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
  4745. CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
  4746. TaskDup->setDoesNotRecurse();
  4747. CodeGenFunction CGF(CGM);
  4748. CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
  4749. Loc);
  4750. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  4751. CGF.GetAddrOfLocalVar(&DstArg),
  4752. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  4753. // task_dst->liter = lastpriv;
  4754. if (WithLastIter) {
  4755. auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
  4756. LValue Base = CGF.EmitLValueForField(
  4757. TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  4758. LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
  4759. llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
  4760. CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
  4761. CGF.EmitStoreOfScalar(Lastpriv, LILVal);
  4762. }
  4763. // Emit initial values for private copies (if any).
  4764. assert(!Privates.empty());
  4765. Address KmpTaskSharedsPtr = Address::invalid();
  4766. if (!Data.FirstprivateVars.empty()) {
  4767. LValue TDBase = CGF.EmitLoadOfPointerLValue(
  4768. CGF.GetAddrOfLocalVar(&SrcArg),
  4769. KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
  4770. LValue Base = CGF.EmitLValueForField(
  4771. TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
  4772. KmpTaskSharedsPtr = Address(
  4773. CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
  4774. Base, *std::next(KmpTaskTQTyRD->field_begin(),
  4775. KmpTaskTShareds)),
  4776. Loc),
  4777. CGF.getNaturalTypeAlignment(SharedsTy));
  4778. }
  4779. emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
  4780. SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
  4781. CGF.FinishFunction();
  4782. return TaskDup;
  4783. }
  4784. /// Checks if destructor function is required to be generated.
  4785. /// \return true if cleanups are required, false otherwise.
  4786. static bool
  4787. checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
  4788. bool NeedsCleanup = false;
  4789. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
  4790. const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
  4791. for (const FieldDecl *FD : PrivateRD->fields()) {
  4792. NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
  4793. if (NeedsCleanup)
  4794. break;
  4795. }
  4796. return NeedsCleanup;
  4797. }
  4798. CGOpenMPRuntime::TaskResultTy
  4799. CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
  4800. const OMPExecutableDirective &D,
  4801. llvm::Function *TaskFunction, QualType SharedsTy,
  4802. Address Shareds, const OMPTaskDataTy &Data) {
  4803. ASTContext &C = CGM.getContext();
  4804. llvm::SmallVector<PrivateDataTy, 4> Privates;
  4805. // Aggregate privates and sort them by the alignment.
  4806. auto I = Data.PrivateCopies.begin();
  4807. for (const Expr *E : Data.PrivateVars) {
  4808. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4809. Privates.emplace_back(
  4810. C.getDeclAlign(VD),
  4811. PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  4812. /*PrivateElemInit=*/nullptr));
  4813. ++I;
  4814. }
  4815. I = Data.FirstprivateCopies.begin();
  4816. auto IElemInitRef = Data.FirstprivateInits.begin();
  4817. for (const Expr *E : Data.FirstprivateVars) {
  4818. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4819. Privates.emplace_back(
  4820. C.getDeclAlign(VD),
  4821. PrivateHelpersTy(
  4822. VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  4823. cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
  4824. ++I;
  4825. ++IElemInitRef;
  4826. }
  4827. I = Data.LastprivateCopies.begin();
  4828. for (const Expr *E : Data.LastprivateVars) {
  4829. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4830. Privates.emplace_back(
  4831. C.getDeclAlign(VD),
  4832. PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
  4833. /*PrivateElemInit=*/nullptr));
  4834. ++I;
  4835. }
  4836. llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
  4837. return L.first > R.first;
  4838. });
  4839. QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  4840. // Build type kmp_routine_entry_t (if not built yet).
  4841. emitKmpRoutineEntryT(KmpInt32Ty);
  4842. // Build type kmp_task_t (if not built yet).
  4843. if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
  4844. if (SavedKmpTaskloopTQTy.isNull()) {
  4845. SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
  4846. CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
  4847. }
  4848. KmpTaskTQTy = SavedKmpTaskloopTQTy;
  4849. } else {
  4850. assert((D.getDirectiveKind() == OMPD_task ||
  4851. isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
  4852. isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
  4853. "Expected taskloop, task or target directive");
  4854. if (SavedKmpTaskTQTy.isNull()) {
  4855. SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
  4856. CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
  4857. }
  4858. KmpTaskTQTy = SavedKmpTaskTQTy;
  4859. }
  4860. const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
  4861. // Build particular struct kmp_task_t for the given task.
  4862. const RecordDecl *KmpTaskTWithPrivatesQTyRD =
  4863. createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
  4864. QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
  4865. QualType KmpTaskTWithPrivatesPtrQTy =
  4866. C.getPointerType(KmpTaskTWithPrivatesQTy);
  4867. llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
  4868. llvm::Type *KmpTaskTWithPrivatesPtrTy =
  4869. KmpTaskTWithPrivatesTy->getPointerTo();
  4870. llvm::Value *KmpTaskTWithPrivatesTySize =
  4871. CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
  4872. QualType SharedsPtrTy = C.getPointerType(SharedsTy);
  4873. // Emit initial values for private copies (if any).
  4874. llvm::Value *TaskPrivatesMap = nullptr;
  4875. llvm::Type *TaskPrivatesMapTy =
  4876. std::next(TaskFunction->arg_begin(), 3)->getType();
  4877. if (!Privates.empty()) {
  4878. auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
  4879. TaskPrivatesMap = emitTaskPrivateMappingFunction(
  4880. CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
  4881. FI->getType(), Privates);
  4882. TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4883. TaskPrivatesMap, TaskPrivatesMapTy);
  4884. } else {
  4885. TaskPrivatesMap = llvm::ConstantPointerNull::get(
  4886. cast<llvm::PointerType>(TaskPrivatesMapTy));
  4887. }
  4888. // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
  4889. // kmp_task_t *tt);
  4890. llvm::Function *TaskEntry = emitProxyTaskFunction(
  4891. CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
  4892. KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
  4893. TaskPrivatesMap);
  4894. // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
  4895. // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  4896. // kmp_routine_entry_t *task_entry);
  4897. // Task flags. Format is taken from
  4898. // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
  4899. // description of kmp_tasking_flags struct.
  4900. enum {
  4901. TiedFlag = 0x1,
  4902. FinalFlag = 0x2,
  4903. DestructorsFlag = 0x8,
  4904. PriorityFlag = 0x20
  4905. };
  4906. unsigned Flags = Data.Tied ? TiedFlag : 0;
  4907. bool NeedsCleanup = false;
  4908. if (!Privates.empty()) {
  4909. NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
  4910. if (NeedsCleanup)
  4911. Flags = Flags | DestructorsFlag;
  4912. }
  4913. if (Data.Priority.getInt())
  4914. Flags = Flags | PriorityFlag;
  4915. llvm::Value *TaskFlags =
  4916. Data.Final.getPointer()
  4917. ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
  4918. CGF.Builder.getInt32(FinalFlag),
  4919. CGF.Builder.getInt32(/*C=*/0))
  4920. : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
  4921. TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
  4922. llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
  4923. SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
  4924. getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
  4925. SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4926. TaskEntry, KmpRoutineEntryPtrTy)};
  4927. llvm::Value *NewTask;
  4928. if (D.hasClausesOfKind<OMPNowaitClause>()) {
  4929. // Check if we have any device clause associated with the directive.
  4930. const Expr *Device = nullptr;
  4931. if (auto *C = D.getSingleClause<OMPDeviceClause>())
  4932. Device = C->getDevice();
  4933. // Emit device ID if any otherwise use default value.
  4934. llvm::Value *DeviceID;
  4935. if (Device)
  4936. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  4937. CGF.Int64Ty, /*isSigned=*/true);
  4938. else
  4939. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  4940. AllocArgs.push_back(DeviceID);
  4941. NewTask = CGF.EmitRuntimeCall(
  4942. createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
  4943. } else {
  4944. NewTask = CGF.EmitRuntimeCall(
  4945. createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
  4946. }
  4947. llvm::Value *NewTaskNewTaskTTy =
  4948. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4949. NewTask, KmpTaskTWithPrivatesPtrTy);
  4950. LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
  4951. KmpTaskTWithPrivatesQTy);
  4952. LValue TDBase =
  4953. CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
  4954. // Fill the data in the resulting kmp_task_t record.
  4955. // Copy shareds if there are any.
  4956. Address KmpTaskSharedsPtr = Address::invalid();
  4957. if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
  4958. KmpTaskSharedsPtr =
  4959. Address(CGF.EmitLoadOfScalar(
  4960. CGF.EmitLValueForField(
  4961. TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
  4962. KmpTaskTShareds)),
  4963. Loc),
  4964. CGF.getNaturalTypeAlignment(SharedsTy));
  4965. LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
  4966. LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
  4967. CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
  4968. }
  4969. // Emit initial values for private copies (if any).
  4970. TaskResultTy Result;
  4971. if (!Privates.empty()) {
  4972. emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
  4973. SharedsTy, SharedsPtrTy, Data, Privates,
  4974. /*ForDup=*/false);
  4975. if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
  4976. (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
  4977. Result.TaskDupFn = emitTaskDupFunction(
  4978. CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
  4979. KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
  4980. /*WithLastIter=*/!Data.LastprivateVars.empty());
  4981. }
  4982. }
  4983. // Fields of union "kmp_cmplrdata_t" for destructors and priority.
  4984. enum { Priority = 0, Destructors = 1 };
  4985. // Provide pointer to function with destructors for privates.
  4986. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
  4987. const RecordDecl *KmpCmplrdataUD =
  4988. (*FI)->getType()->getAsUnionType()->getDecl();
  4989. if (NeedsCleanup) {
  4990. llvm::Value *DestructorFn = emitDestructorsFunction(
  4991. CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
  4992. KmpTaskTWithPrivatesQTy);
  4993. LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
  4994. LValue DestructorsLV = CGF.EmitLValueForField(
  4995. Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
  4996. CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  4997. DestructorFn, KmpRoutineEntryPtrTy),
  4998. DestructorsLV);
  4999. }
  5000. // Set priority.
  5001. if (Data.Priority.getInt()) {
  5002. LValue Data2LV = CGF.EmitLValueForField(
  5003. TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
  5004. LValue PriorityLV = CGF.EmitLValueForField(
  5005. Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
  5006. CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
  5007. }
  5008. Result.NewTask = NewTask;
  5009. Result.TaskEntry = TaskEntry;
  5010. Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
  5011. Result.TDBase = TDBase;
  5012. Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
  5013. return Result;
  5014. }
  5015. void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
  5016. const OMPExecutableDirective &D,
  5017. llvm::Function *TaskFunction,
  5018. QualType SharedsTy, Address Shareds,
  5019. const Expr *IfCond,
  5020. const OMPTaskDataTy &Data) {
  5021. if (!CGF.HaveInsertPoint())
  5022. return;
  5023. TaskResultTy Result =
  5024. emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
  5025. llvm::Value *NewTask = Result.NewTask;
  5026. llvm::Function *TaskEntry = Result.TaskEntry;
  5027. llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
  5028. LValue TDBase = Result.TDBase;
  5029. const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
  5030. ASTContext &C = CGM.getContext();
  5031. // Process list of dependences.
  5032. Address DependenciesArray = Address::invalid();
  5033. unsigned NumDependencies = Data.Dependences.size();
  5034. if (NumDependencies) {
  5035. // Dependence kind for RTL.
  5036. enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
  5037. enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
  5038. RecordDecl *KmpDependInfoRD;
  5039. QualType FlagsTy =
  5040. C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
  5041. llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
  5042. if (KmpDependInfoTy.isNull()) {
  5043. KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
  5044. KmpDependInfoRD->startDefinition();
  5045. addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
  5046. addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
  5047. addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
  5048. KmpDependInfoRD->completeDefinition();
  5049. KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
  5050. } else {
  5051. KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
  5052. }
  5053. // Define type kmp_depend_info[<Dependences.size()>];
  5054. QualType KmpDependInfoArrayTy = C.getConstantArrayType(
  5055. KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
  5056. nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
  5057. // kmp_depend_info[<Dependences.size()>] deps;
  5058. DependenciesArray =
  5059. CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
  5060. for (unsigned I = 0; I < NumDependencies; ++I) {
  5061. const Expr *E = Data.Dependences[I].second;
  5062. LValue Addr = CGF.EmitLValue(E);
  5063. llvm::Value *Size;
  5064. QualType Ty = E->getType();
  5065. if (const auto *ASE =
  5066. dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
  5067. LValue UpAddrLVal =
  5068. CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
  5069. llvm::Value *UpAddr =
  5070. CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
  5071. llvm::Value *LowIntPtr =
  5072. CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
  5073. llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
  5074. Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
  5075. } else {
  5076. Size = CGF.getTypeSize(Ty);
  5077. }
  5078. LValue Base = CGF.MakeAddrLValue(
  5079. CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
  5080. KmpDependInfoTy);
  5081. // deps[i].base_addr = &<Dependences[i].second>;
  5082. LValue BaseAddrLVal = CGF.EmitLValueForField(
  5083. Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
  5084. CGF.EmitStoreOfScalar(
  5085. CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
  5086. BaseAddrLVal);
  5087. // deps[i].len = sizeof(<Dependences[i].second>);
  5088. LValue LenLVal = CGF.EmitLValueForField(
  5089. Base, *std::next(KmpDependInfoRD->field_begin(), Len));
  5090. CGF.EmitStoreOfScalar(Size, LenLVal);
  5091. // deps[i].flags = <Dependences[i].first>;
  5092. RTLDependenceKindTy DepKind;
  5093. switch (Data.Dependences[I].first) {
  5094. case OMPC_DEPEND_in:
  5095. DepKind = DepIn;
  5096. break;
  5097. // Out and InOut dependencies must use the same code.
  5098. case OMPC_DEPEND_out:
  5099. case OMPC_DEPEND_inout:
  5100. DepKind = DepInOut;
  5101. break;
  5102. case OMPC_DEPEND_mutexinoutset:
  5103. DepKind = DepMutexInOutSet;
  5104. break;
  5105. case OMPC_DEPEND_source:
  5106. case OMPC_DEPEND_sink:
  5107. case OMPC_DEPEND_unknown:
  5108. llvm_unreachable("Unknown task dependence type");
  5109. }
  5110. LValue FlagsLVal = CGF.EmitLValueForField(
  5111. Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
  5112. CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
  5113. FlagsLVal);
  5114. }
  5115. DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5116. CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
  5117. }
  5118. // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
  5119. // libcall.
  5120. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
  5121. // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
  5122. // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
  5123. // list is not empty
  5124. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  5125. llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
  5126. llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
  5127. llvm::Value *DepTaskArgs[7];
  5128. if (NumDependencies) {
  5129. DepTaskArgs[0] = UpLoc;
  5130. DepTaskArgs[1] = ThreadID;
  5131. DepTaskArgs[2] = NewTask;
  5132. DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
  5133. DepTaskArgs[4] = DependenciesArray.getPointer();
  5134. DepTaskArgs[5] = CGF.Builder.getInt32(0);
  5135. DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  5136. }
  5137. auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
  5138. &TaskArgs,
  5139. &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
  5140. if (!Data.Tied) {
  5141. auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
  5142. LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
  5143. CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
  5144. }
  5145. if (NumDependencies) {
  5146. CGF.EmitRuntimeCall(
  5147. createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
  5148. } else {
  5149. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
  5150. TaskArgs);
  5151. }
  5152. // Check if parent region is untied and build return for untied task;
  5153. if (auto *Region =
  5154. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  5155. Region->emitUntiedSwitch(CGF);
  5156. };
  5157. llvm::Value *DepWaitTaskArgs[6];
  5158. if (NumDependencies) {
  5159. DepWaitTaskArgs[0] = UpLoc;
  5160. DepWaitTaskArgs[1] = ThreadID;
  5161. DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
  5162. DepWaitTaskArgs[3] = DependenciesArray.getPointer();
  5163. DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
  5164. DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
  5165. }
  5166. auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
  5167. NumDependencies, &DepWaitTaskArgs,
  5168. Loc](CodeGenFunction &CGF, PrePostActionTy &) {
  5169. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  5170. CodeGenFunction::RunCleanupsScope LocalScope(CGF);
  5171. // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
  5172. // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
  5173. // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
  5174. // is specified.
  5175. if (NumDependencies)
  5176. CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
  5177. DepWaitTaskArgs);
  5178. // Call proxy_task_entry(gtid, new_task);
  5179. auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
  5180. Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
  5181. Action.Enter(CGF);
  5182. llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
  5183. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
  5184. OutlinedFnArgs);
  5185. };
  5186. // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
  5187. // kmp_task_t *new_task);
  5188. // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
  5189. // kmp_task_t *new_task);
  5190. RegionCodeGenTy RCG(CodeGen);
  5191. CommonActionTy Action(
  5192. RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
  5193. RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
  5194. RCG.setAction(Action);
  5195. RCG(CGF);
  5196. };
  5197. if (IfCond) {
  5198. emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
  5199. } else {
  5200. RegionCodeGenTy ThenRCG(ThenCodeGen);
  5201. ThenRCG(CGF);
  5202. }
  5203. }
  5204. void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
  5205. const OMPLoopDirective &D,
  5206. llvm::Function *TaskFunction,
  5207. QualType SharedsTy, Address Shareds,
  5208. const Expr *IfCond,
  5209. const OMPTaskDataTy &Data) {
  5210. if (!CGF.HaveInsertPoint())
  5211. return;
  5212. TaskResultTy Result =
  5213. emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
  5214. // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
  5215. // libcall.
  5216. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
  5217. // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
  5218. // sched, kmp_uint64 grainsize, void *task_dup);
  5219. llvm::Value *ThreadID = getThreadID(CGF, Loc);
  5220. llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
  5221. llvm::Value *IfVal;
  5222. if (IfCond) {
  5223. IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
  5224. /*isSigned=*/true);
  5225. } else {
  5226. IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
  5227. }
  5228. LValue LBLVal = CGF.EmitLValueForField(
  5229. Result.TDBase,
  5230. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
  5231. const auto *LBVar =
  5232. cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
  5233. CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
  5234. /*IsInitializer=*/true);
  5235. LValue UBLVal = CGF.EmitLValueForField(
  5236. Result.TDBase,
  5237. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
  5238. const auto *UBVar =
  5239. cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
  5240. CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
  5241. /*IsInitializer=*/true);
  5242. LValue StLVal = CGF.EmitLValueForField(
  5243. Result.TDBase,
  5244. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
  5245. const auto *StVar =
  5246. cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
  5247. CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
  5248. /*IsInitializer=*/true);
  5249. // Store reductions address.
  5250. LValue RedLVal = CGF.EmitLValueForField(
  5251. Result.TDBase,
  5252. *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
  5253. if (Data.Reductions) {
  5254. CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
  5255. } else {
  5256. CGF.EmitNullInitialization(RedLVal.getAddress(),
  5257. CGF.getContext().VoidPtrTy);
  5258. }
  5259. enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
  5260. llvm::Value *TaskArgs[] = {
  5261. UpLoc,
  5262. ThreadID,
  5263. Result.NewTask,
  5264. IfVal,
  5265. LBLVal.getPointer(),
  5266. UBLVal.getPointer(),
  5267. CGF.EmitLoadOfScalar(StLVal, Loc),
  5268. llvm::ConstantInt::getSigned(
  5269. CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
  5270. llvm::ConstantInt::getSigned(
  5271. CGF.IntTy, Data.Schedule.getPointer()
  5272. ? Data.Schedule.getInt() ? NumTasks : Grainsize
  5273. : NoSchedule),
  5274. Data.Schedule.getPointer()
  5275. ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
  5276. /*isSigned=*/false)
  5277. : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
  5278. Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5279. Result.TaskDupFn, CGF.VoidPtrTy)
  5280. : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
  5281. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
  5282. }
  5283. /// Emit reduction operation for each element of array (required for
  5284. /// array sections) LHS op = RHS.
  5285. /// \param Type Type of array.
  5286. /// \param LHSVar Variable on the left side of the reduction operation
  5287. /// (references element of array in original variable).
  5288. /// \param RHSVar Variable on the right side of the reduction operation
  5289. /// (references element of array in original variable).
  5290. /// \param RedOpGen Generator of reduction operation with use of LHSVar and
  5291. /// RHSVar.
  5292. static void EmitOMPAggregateReduction(
  5293. CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
  5294. const VarDecl *RHSVar,
  5295. const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
  5296. const Expr *, const Expr *)> &RedOpGen,
  5297. const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
  5298. const Expr *UpExpr = nullptr) {
  5299. // Perform element-by-element initialization.
  5300. QualType ElementTy;
  5301. Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
  5302. Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
  5303. // Drill down to the base element type on both arrays.
  5304. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
  5305. llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
  5306. llvm::Value *RHSBegin = RHSAddr.getPointer();
  5307. llvm::Value *LHSBegin = LHSAddr.getPointer();
  5308. // Cast from pointer to array type to pointer to single element.
  5309. llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
  5310. // The basic structure here is a while-do loop.
  5311. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
  5312. llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
  5313. llvm::Value *IsEmpty =
  5314. CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
  5315. CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  5316. // Enter the loop body, making that address the current address.
  5317. llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
  5318. CGF.EmitBlock(BodyBB);
  5319. CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
  5320. llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
  5321. RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
  5322. RHSElementPHI->addIncoming(RHSBegin, EntryBB);
  5323. Address RHSElementCurrent =
  5324. Address(RHSElementPHI,
  5325. RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  5326. llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
  5327. LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
  5328. LHSElementPHI->addIncoming(LHSBegin, EntryBB);
  5329. Address LHSElementCurrent =
  5330. Address(LHSElementPHI,
  5331. LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  5332. // Emit copy.
  5333. CodeGenFunction::OMPPrivateScope Scope(CGF);
  5334. Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
  5335. Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
  5336. Scope.Privatize();
  5337. RedOpGen(CGF, XExpr, EExpr, UpExpr);
  5338. Scope.ForceCleanup();
  5339. // Shift the address forward by one element.
  5340. llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
  5341. LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  5342. llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
  5343. RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
  5344. // Check whether we've reached the end.
  5345. llvm::Value *Done =
  5346. CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
  5347. CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
  5348. LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
  5349. RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
  5350. // Done.
  5351. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  5352. }
  5353. /// Emit reduction combiner. If the combiner is a simple expression emit it as
  5354. /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
  5355. /// UDR combiner function.
  5356. static void emitReductionCombiner(CodeGenFunction &CGF,
  5357. const Expr *ReductionOp) {
  5358. if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
  5359. if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
  5360. if (const auto *DRE =
  5361. dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
  5362. if (const auto *DRD =
  5363. dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
  5364. std::pair<llvm::Function *, llvm::Function *> Reduction =
  5365. CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
  5366. RValue Func = RValue::get(Reduction.first);
  5367. CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
  5368. CGF.EmitIgnoredExpr(ReductionOp);
  5369. return;
  5370. }
  5371. CGF.EmitIgnoredExpr(ReductionOp);
  5372. }
  5373. llvm::Function *CGOpenMPRuntime::emitReductionFunction(
  5374. SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
  5375. ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
  5376. ArrayRef<const Expr *> ReductionOps) {
  5377. ASTContext &C = CGM.getContext();
  5378. // void reduction_func(void *LHSArg, void *RHSArg);
  5379. FunctionArgList Args;
  5380. ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5381. ImplicitParamDecl::Other);
  5382. ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5383. ImplicitParamDecl::Other);
  5384. Args.push_back(&LHSArg);
  5385. Args.push_back(&RHSArg);
  5386. const auto &CGFI =
  5387. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5388. std::string Name = getName({"omp", "reduction", "reduction_func"});
  5389. auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
  5390. llvm::GlobalValue::InternalLinkage, Name,
  5391. &CGM.getModule());
  5392. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
  5393. Fn->setDoesNotRecurse();
  5394. CodeGenFunction CGF(CGM);
  5395. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
  5396. // Dst = (void*[n])(LHSArg);
  5397. // Src = (void*[n])(RHSArg);
  5398. Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5399. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
  5400. ArgsType), CGF.getPointerAlign());
  5401. Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5402. CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
  5403. ArgsType), CGF.getPointerAlign());
  5404. // ...
  5405. // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
  5406. // ...
  5407. CodeGenFunction::OMPPrivateScope Scope(CGF);
  5408. auto IPriv = Privates.begin();
  5409. unsigned Idx = 0;
  5410. for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
  5411. const auto *RHSVar =
  5412. cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
  5413. Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
  5414. return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
  5415. });
  5416. const auto *LHSVar =
  5417. cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
  5418. Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
  5419. return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
  5420. });
  5421. QualType PrivTy = (*IPriv)->getType();
  5422. if (PrivTy->isVariablyModifiedType()) {
  5423. // Get array size and emit VLA type.
  5424. ++Idx;
  5425. Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
  5426. llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
  5427. const VariableArrayType *VLA =
  5428. CGF.getContext().getAsVariableArrayType(PrivTy);
  5429. const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
  5430. CodeGenFunction::OpaqueValueMapping OpaqueMap(
  5431. CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
  5432. CGF.EmitVariablyModifiedType(PrivTy);
  5433. }
  5434. }
  5435. Scope.Privatize();
  5436. IPriv = Privates.begin();
  5437. auto ILHS = LHSExprs.begin();
  5438. auto IRHS = RHSExprs.begin();
  5439. for (const Expr *E : ReductionOps) {
  5440. if ((*IPriv)->getType()->isArrayType()) {
  5441. // Emit reduction for array section.
  5442. const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  5443. const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  5444. EmitOMPAggregateReduction(
  5445. CGF, (*IPriv)->getType(), LHSVar, RHSVar,
  5446. [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
  5447. emitReductionCombiner(CGF, E);
  5448. });
  5449. } else {
  5450. // Emit reduction for array subscript or single variable.
  5451. emitReductionCombiner(CGF, E);
  5452. }
  5453. ++IPriv;
  5454. ++ILHS;
  5455. ++IRHS;
  5456. }
  5457. Scope.ForceCleanup();
  5458. CGF.FinishFunction();
  5459. return Fn;
  5460. }
  5461. void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
  5462. const Expr *ReductionOp,
  5463. const Expr *PrivateRef,
  5464. const DeclRefExpr *LHS,
  5465. const DeclRefExpr *RHS) {
  5466. if (PrivateRef->getType()->isArrayType()) {
  5467. // Emit reduction for array section.
  5468. const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
  5469. const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
  5470. EmitOMPAggregateReduction(
  5471. CGF, PrivateRef->getType(), LHSVar, RHSVar,
  5472. [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
  5473. emitReductionCombiner(CGF, ReductionOp);
  5474. });
  5475. } else {
  5476. // Emit reduction for array subscript or single variable.
  5477. emitReductionCombiner(CGF, ReductionOp);
  5478. }
  5479. }
  5480. void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
  5481. ArrayRef<const Expr *> Privates,
  5482. ArrayRef<const Expr *> LHSExprs,
  5483. ArrayRef<const Expr *> RHSExprs,
  5484. ArrayRef<const Expr *> ReductionOps,
  5485. ReductionOptionsTy Options) {
  5486. if (!CGF.HaveInsertPoint())
  5487. return;
  5488. bool WithNowait = Options.WithNowait;
  5489. bool SimpleReduction = Options.SimpleReduction;
  5490. // Next code should be emitted for reduction:
  5491. //
  5492. // static kmp_critical_name lock = { 0 };
  5493. //
  5494. // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
  5495. // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
  5496. // ...
  5497. // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
  5498. // *(Type<n>-1*)rhs[<n>-1]);
  5499. // }
  5500. //
  5501. // ...
  5502. // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
  5503. // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
  5504. // RedList, reduce_func, &<lock>)) {
  5505. // case 1:
  5506. // ...
  5507. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  5508. // ...
  5509. // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  5510. // break;
  5511. // case 2:
  5512. // ...
  5513. // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
  5514. // ...
  5515. // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
  5516. // break;
  5517. // default:;
  5518. // }
  5519. //
  5520. // if SimpleReduction is true, only the next code is generated:
  5521. // ...
  5522. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  5523. // ...
  5524. ASTContext &C = CGM.getContext();
  5525. if (SimpleReduction) {
  5526. CodeGenFunction::RunCleanupsScope Scope(CGF);
  5527. auto IPriv = Privates.begin();
  5528. auto ILHS = LHSExprs.begin();
  5529. auto IRHS = RHSExprs.begin();
  5530. for (const Expr *E : ReductionOps) {
  5531. emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
  5532. cast<DeclRefExpr>(*IRHS));
  5533. ++IPriv;
  5534. ++ILHS;
  5535. ++IRHS;
  5536. }
  5537. return;
  5538. }
  5539. // 1. Build a list of reduction variables.
  5540. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
  5541. auto Size = RHSExprs.size();
  5542. for (const Expr *E : Privates) {
  5543. if (E->getType()->isVariablyModifiedType())
  5544. // Reserve place for array size.
  5545. ++Size;
  5546. }
  5547. llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
  5548. QualType ReductionArrayTy =
  5549. C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
  5550. /*IndexTypeQuals=*/0);
  5551. Address ReductionList =
  5552. CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
  5553. auto IPriv = Privates.begin();
  5554. unsigned Idx = 0;
  5555. for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
  5556. Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
  5557. CGF.Builder.CreateStore(
  5558. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5559. CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
  5560. Elem);
  5561. if ((*IPriv)->getType()->isVariablyModifiedType()) {
  5562. // Store array size.
  5563. ++Idx;
  5564. Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
  5565. llvm::Value *Size = CGF.Builder.CreateIntCast(
  5566. CGF.getVLASize(
  5567. CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
  5568. .NumElts,
  5569. CGF.SizeTy, /*isSigned=*/false);
  5570. CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
  5571. Elem);
  5572. }
  5573. }
  5574. // 2. Emit reduce_func().
  5575. llvm::Function *ReductionFn = emitReductionFunction(
  5576. Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
  5577. LHSExprs, RHSExprs, ReductionOps);
  5578. // 3. Create static kmp_critical_name lock = { 0 };
  5579. std::string Name = getName({"reduction"});
  5580. llvm::Value *Lock = getCriticalRegionLock(Name);
  5581. // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
  5582. // RedList, reduce_func, &<lock>);
  5583. llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
  5584. llvm::Value *ThreadId = getThreadID(CGF, Loc);
  5585. llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
  5586. llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  5587. ReductionList.getPointer(), CGF.VoidPtrTy);
  5588. llvm::Value *Args[] = {
  5589. IdentTLoc, // ident_t *<loc>
  5590. ThreadId, // i32 <gtid>
  5591. CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
  5592. ReductionArrayTySize, // size_type sizeof(RedList)
  5593. RL, // void *RedList
  5594. ReductionFn, // void (*) (void *, void *) <reduce_func>
  5595. Lock // kmp_critical_name *&<lock>
  5596. };
  5597. llvm::Value *Res = CGF.EmitRuntimeCall(
  5598. createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
  5599. : OMPRTL__kmpc_reduce),
  5600. Args);
  5601. // 5. Build switch(res)
  5602. llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
  5603. llvm::SwitchInst *SwInst =
  5604. CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
  5605. // 6. Build case 1:
  5606. // ...
  5607. // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
  5608. // ...
  5609. // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  5610. // break;
  5611. llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
  5612. SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
  5613. CGF.EmitBlock(Case1BB);
  5614. // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
  5615. llvm::Value *EndArgs[] = {
  5616. IdentTLoc, // ident_t *<loc>
  5617. ThreadId, // i32 <gtid>
  5618. Lock // kmp_critical_name *&<lock>
  5619. };
  5620. auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
  5621. CodeGenFunction &CGF, PrePostActionTy &Action) {
  5622. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  5623. auto IPriv = Privates.begin();
  5624. auto ILHS = LHSExprs.begin();
  5625. auto IRHS = RHSExprs.begin();
  5626. for (const Expr *E : ReductionOps) {
  5627. RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
  5628. cast<DeclRefExpr>(*IRHS));
  5629. ++IPriv;
  5630. ++ILHS;
  5631. ++IRHS;
  5632. }
  5633. };
  5634. RegionCodeGenTy RCG(CodeGen);
  5635. CommonActionTy Action(
  5636. nullptr, llvm::None,
  5637. createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
  5638. : OMPRTL__kmpc_end_reduce),
  5639. EndArgs);
  5640. RCG.setAction(Action);
  5641. RCG(CGF);
  5642. CGF.EmitBranch(DefaultBB);
  5643. // 7. Build case 2:
  5644. // ...
  5645. // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
  5646. // ...
  5647. // break;
  5648. llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
  5649. SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
  5650. CGF.EmitBlock(Case2BB);
  5651. auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
  5652. CodeGenFunction &CGF, PrePostActionTy &Action) {
  5653. auto ILHS = LHSExprs.begin();
  5654. auto IRHS = RHSExprs.begin();
  5655. auto IPriv = Privates.begin();
  5656. for (const Expr *E : ReductionOps) {
  5657. const Expr *XExpr = nullptr;
  5658. const Expr *EExpr = nullptr;
  5659. const Expr *UpExpr = nullptr;
  5660. BinaryOperatorKind BO = BO_Comma;
  5661. if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
  5662. if (BO->getOpcode() == BO_Assign) {
  5663. XExpr = BO->getLHS();
  5664. UpExpr = BO->getRHS();
  5665. }
  5666. }
  5667. // Try to emit update expression as a simple atomic.
  5668. const Expr *RHSExpr = UpExpr;
  5669. if (RHSExpr) {
  5670. // Analyze RHS part of the whole expression.
  5671. if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
  5672. RHSExpr->IgnoreParenImpCasts())) {
  5673. // If this is a conditional operator, analyze its condition for
  5674. // min/max reduction operator.
  5675. RHSExpr = ACO->getCond();
  5676. }
  5677. if (const auto *BORHS =
  5678. dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
  5679. EExpr = BORHS->getRHS();
  5680. BO = BORHS->getOpcode();
  5681. }
  5682. }
  5683. if (XExpr) {
  5684. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  5685. auto &&AtomicRedGen = [BO, VD,
  5686. Loc](CodeGenFunction &CGF, const Expr *XExpr,
  5687. const Expr *EExpr, const Expr *UpExpr) {
  5688. LValue X = CGF.EmitLValue(XExpr);
  5689. RValue E;
  5690. if (EExpr)
  5691. E = CGF.EmitAnyExpr(EExpr);
  5692. CGF.EmitOMPAtomicSimpleUpdateExpr(
  5693. X, E, BO, /*IsXLHSInRHSPart=*/true,
  5694. llvm::AtomicOrdering::Monotonic, Loc,
  5695. [&CGF, UpExpr, VD, Loc](RValue XRValue) {
  5696. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  5697. PrivateScope.addPrivate(
  5698. VD, [&CGF, VD, XRValue, Loc]() {
  5699. Address LHSTemp = CGF.CreateMemTemp(VD->getType());
  5700. CGF.emitOMPSimpleStore(
  5701. CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
  5702. VD->getType().getNonReferenceType(), Loc);
  5703. return LHSTemp;
  5704. });
  5705. (void)PrivateScope.Privatize();
  5706. return CGF.EmitAnyExpr(UpExpr);
  5707. });
  5708. };
  5709. if ((*IPriv)->getType()->isArrayType()) {
  5710. // Emit atomic reduction for array section.
  5711. const auto *RHSVar =
  5712. cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  5713. EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
  5714. AtomicRedGen, XExpr, EExpr, UpExpr);
  5715. } else {
  5716. // Emit atomic reduction for array subscript or single variable.
  5717. AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
  5718. }
  5719. } else {
  5720. // Emit as a critical region.
  5721. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
  5722. const Expr *, const Expr *) {
  5723. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  5724. std::string Name = RT.getName({"atomic_reduction"});
  5725. RT.emitCriticalRegion(
  5726. CGF, Name,
  5727. [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
  5728. Action.Enter(CGF);
  5729. emitReductionCombiner(CGF, E);
  5730. },
  5731. Loc);
  5732. };
  5733. if ((*IPriv)->getType()->isArrayType()) {
  5734. const auto *LHSVar =
  5735. cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  5736. const auto *RHSVar =
  5737. cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  5738. EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
  5739. CritRedGen);
  5740. } else {
  5741. CritRedGen(CGF, nullptr, nullptr, nullptr);
  5742. }
  5743. }
  5744. ++ILHS;
  5745. ++IRHS;
  5746. ++IPriv;
  5747. }
  5748. };
  5749. RegionCodeGenTy AtomicRCG(AtomicCodeGen);
  5750. if (!WithNowait) {
  5751. // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
  5752. llvm::Value *EndArgs[] = {
  5753. IdentTLoc, // ident_t *<loc>
  5754. ThreadId, // i32 <gtid>
  5755. Lock // kmp_critical_name *&<lock>
  5756. };
  5757. CommonActionTy Action(nullptr, llvm::None,
  5758. createRuntimeFunction(OMPRTL__kmpc_end_reduce),
  5759. EndArgs);
  5760. AtomicRCG.setAction(Action);
  5761. AtomicRCG(CGF);
  5762. } else {
  5763. AtomicRCG(CGF);
  5764. }
  5765. CGF.EmitBranch(DefaultBB);
  5766. CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
  5767. }
  5768. /// Generates unique name for artificial threadprivate variables.
  5769. /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
  5770. static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
  5771. const Expr *Ref) {
  5772. SmallString<256> Buffer;
  5773. llvm::raw_svector_ostream Out(Buffer);
  5774. const clang::DeclRefExpr *DE;
  5775. const VarDecl *D = ::getBaseDecl(Ref, DE);
  5776. if (!D)
  5777. D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
  5778. D = D->getCanonicalDecl();
  5779. std::string Name = CGM.getOpenMPRuntime().getName(
  5780. {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
  5781. Out << Prefix << Name << "_"
  5782. << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
  5783. return Out.str();
  5784. }
  5785. /// Emits reduction initializer function:
  5786. /// \code
  5787. /// void @.red_init(void* %arg) {
  5788. /// %0 = bitcast void* %arg to <type>*
  5789. /// store <type> <init>, <type>* %0
  5790. /// ret void
  5791. /// }
  5792. /// \endcode
  5793. static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
  5794. SourceLocation Loc,
  5795. ReductionCodeGen &RCG, unsigned N) {
  5796. ASTContext &C = CGM.getContext();
  5797. FunctionArgList Args;
  5798. ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5799. ImplicitParamDecl::Other);
  5800. Args.emplace_back(&Param);
  5801. const auto &FnInfo =
  5802. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5803. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5804. std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
  5805. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5806. Name, &CGM.getModule());
  5807. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5808. Fn->setDoesNotRecurse();
  5809. CodeGenFunction CGF(CGM);
  5810. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5811. Address PrivateAddr = CGF.EmitLoadOfPointer(
  5812. CGF.GetAddrOfLocalVar(&Param),
  5813. C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
  5814. llvm::Value *Size = nullptr;
  5815. // If the size of the reduction item is non-constant, load it from global
  5816. // threadprivate variable.
  5817. if (RCG.getSizes(N).second) {
  5818. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5819. CGF, CGM.getContext().getSizeType(),
  5820. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5821. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5822. CGM.getContext().getSizeType(), Loc);
  5823. }
  5824. RCG.emitAggregateType(CGF, N, Size);
  5825. LValue SharedLVal;
  5826. // If initializer uses initializer from declare reduction construct, emit a
  5827. // pointer to the address of the original reduction item (reuired by reduction
  5828. // initializer)
  5829. if (RCG.usesReductionInitializer(N)) {
  5830. Address SharedAddr =
  5831. CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5832. CGF, CGM.getContext().VoidPtrTy,
  5833. generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
  5834. SharedAddr = CGF.EmitLoadOfPointer(
  5835. SharedAddr,
  5836. CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
  5837. SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
  5838. } else {
  5839. SharedLVal = CGF.MakeNaturalAlignAddrLValue(
  5840. llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
  5841. CGM.getContext().VoidPtrTy);
  5842. }
  5843. // Emit the initializer:
  5844. // %0 = bitcast void* %arg to <type>*
  5845. // store <type> <init>, <type>* %0
  5846. RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
  5847. [](CodeGenFunction &) { return false; });
  5848. CGF.FinishFunction();
  5849. return Fn;
  5850. }
  5851. /// Emits reduction combiner function:
  5852. /// \code
  5853. /// void @.red_comb(void* %arg0, void* %arg1) {
  5854. /// %lhs = bitcast void* %arg0 to <type>*
  5855. /// %rhs = bitcast void* %arg1 to <type>*
  5856. /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
  5857. /// store <type> %2, <type>* %lhs
  5858. /// ret void
  5859. /// }
  5860. /// \endcode
  5861. static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
  5862. SourceLocation Loc,
  5863. ReductionCodeGen &RCG, unsigned N,
  5864. const Expr *ReductionOp,
  5865. const Expr *LHS, const Expr *RHS,
  5866. const Expr *PrivateRef) {
  5867. ASTContext &C = CGM.getContext();
  5868. const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
  5869. const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
  5870. FunctionArgList Args;
  5871. ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  5872. C.VoidPtrTy, ImplicitParamDecl::Other);
  5873. ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5874. ImplicitParamDecl::Other);
  5875. Args.emplace_back(&ParamInOut);
  5876. Args.emplace_back(&ParamIn);
  5877. const auto &FnInfo =
  5878. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5879. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5880. std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
  5881. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5882. Name, &CGM.getModule());
  5883. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5884. Fn->setDoesNotRecurse();
  5885. CodeGenFunction CGF(CGM);
  5886. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5887. llvm::Value *Size = nullptr;
  5888. // If the size of the reduction item is non-constant, load it from global
  5889. // threadprivate variable.
  5890. if (RCG.getSizes(N).second) {
  5891. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5892. CGF, CGM.getContext().getSizeType(),
  5893. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5894. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5895. CGM.getContext().getSizeType(), Loc);
  5896. }
  5897. RCG.emitAggregateType(CGF, N, Size);
  5898. // Remap lhs and rhs variables to the addresses of the function arguments.
  5899. // %lhs = bitcast void* %arg0 to <type>*
  5900. // %rhs = bitcast void* %arg1 to <type>*
  5901. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  5902. PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
  5903. // Pull out the pointer to the variable.
  5904. Address PtrAddr = CGF.EmitLoadOfPointer(
  5905. CGF.GetAddrOfLocalVar(&ParamInOut),
  5906. C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
  5907. return CGF.Builder.CreateElementBitCast(
  5908. PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
  5909. });
  5910. PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
  5911. // Pull out the pointer to the variable.
  5912. Address PtrAddr = CGF.EmitLoadOfPointer(
  5913. CGF.GetAddrOfLocalVar(&ParamIn),
  5914. C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
  5915. return CGF.Builder.CreateElementBitCast(
  5916. PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
  5917. });
  5918. PrivateScope.Privatize();
  5919. // Emit the combiner body:
  5920. // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
  5921. // store <type> %2, <type>* %lhs
  5922. CGM.getOpenMPRuntime().emitSingleReductionCombiner(
  5923. CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
  5924. cast<DeclRefExpr>(RHS));
  5925. CGF.FinishFunction();
  5926. return Fn;
  5927. }
  5928. /// Emits reduction finalizer function:
  5929. /// \code
  5930. /// void @.red_fini(void* %arg) {
  5931. /// %0 = bitcast void* %arg to <type>*
  5932. /// <destroy>(<type>* %0)
  5933. /// ret void
  5934. /// }
  5935. /// \endcode
  5936. static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
  5937. SourceLocation Loc,
  5938. ReductionCodeGen &RCG, unsigned N) {
  5939. if (!RCG.needCleanups(N))
  5940. return nullptr;
  5941. ASTContext &C = CGM.getContext();
  5942. FunctionArgList Args;
  5943. ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  5944. ImplicitParamDecl::Other);
  5945. Args.emplace_back(&Param);
  5946. const auto &FnInfo =
  5947. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  5948. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  5949. std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
  5950. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  5951. Name, &CGM.getModule());
  5952. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  5953. Fn->setDoesNotRecurse();
  5954. CodeGenFunction CGF(CGM);
  5955. CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  5956. Address PrivateAddr = CGF.EmitLoadOfPointer(
  5957. CGF.GetAddrOfLocalVar(&Param),
  5958. C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
  5959. llvm::Value *Size = nullptr;
  5960. // If the size of the reduction item is non-constant, load it from global
  5961. // threadprivate variable.
  5962. if (RCG.getSizes(N).second) {
  5963. Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
  5964. CGF, CGM.getContext().getSizeType(),
  5965. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  5966. Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
  5967. CGM.getContext().getSizeType(), Loc);
  5968. }
  5969. RCG.emitAggregateType(CGF, N, Size);
  5970. // Emit the finalizer body:
  5971. // <destroy>(<type>* %0)
  5972. RCG.emitCleanups(CGF, N, PrivateAddr);
  5973. CGF.FinishFunction();
  5974. return Fn;
  5975. }
  5976. llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
  5977. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
  5978. ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
  5979. if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
  5980. return nullptr;
  5981. // Build typedef struct:
  5982. // kmp_task_red_input {
  5983. // void *reduce_shar; // shared reduction item
  5984. // size_t reduce_size; // size of data item
  5985. // void *reduce_init; // data initialization routine
  5986. // void *reduce_fini; // data finalization routine
  5987. // void *reduce_comb; // data combiner routine
  5988. // kmp_task_red_flags_t flags; // flags for additional info from compiler
  5989. // } kmp_task_red_input_t;
  5990. ASTContext &C = CGM.getContext();
  5991. RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
  5992. RD->startDefinition();
  5993. const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5994. const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
  5995. const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5996. const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5997. const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
  5998. const FieldDecl *FlagsFD = addFieldToRecordDecl(
  5999. C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
  6000. RD->completeDefinition();
  6001. QualType RDType = C.getRecordType(RD);
  6002. unsigned Size = Data.ReductionVars.size();
  6003. llvm::APInt ArraySize(/*numBits=*/64, Size);
  6004. QualType ArrayRDType = C.getConstantArrayType(
  6005. RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
  6006. // kmp_task_red_input_t .rd_input.[Size];
  6007. Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
  6008. ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
  6009. Data.ReductionOps);
  6010. for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
  6011. // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
  6012. llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
  6013. llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
  6014. llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
  6015. TaskRedInput.getPointer(), Idxs,
  6016. /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
  6017. ".rd_input.gep.");
  6018. LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
  6019. // ElemLVal.reduce_shar = &Shareds[Cnt];
  6020. LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
  6021. RCG.emitSharedLValue(CGF, Cnt);
  6022. llvm::Value *CastedShared =
  6023. CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
  6024. CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
  6025. RCG.emitAggregateType(CGF, Cnt);
  6026. llvm::Value *SizeValInChars;
  6027. llvm::Value *SizeVal;
  6028. std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
  6029. // We use delayed creation/initialization for VLAs, array sections and
  6030. // custom reduction initializations. It is required because runtime does not
  6031. // provide the way to pass the sizes of VLAs/array sections to
  6032. // initializer/combiner/finalizer functions and does not pass the pointer to
  6033. // original reduction item to the initializer. Instead threadprivate global
  6034. // variables are used to store these values and use them in the functions.
  6035. bool DelayedCreation = !!SizeVal;
  6036. SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
  6037. /*isSigned=*/false);
  6038. LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
  6039. CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
  6040. // ElemLVal.reduce_init = init;
  6041. LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
  6042. llvm::Value *InitAddr =
  6043. CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
  6044. CGF.EmitStoreOfScalar(InitAddr, InitLVal);
  6045. DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
  6046. // ElemLVal.reduce_fini = fini;
  6047. LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
  6048. llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
  6049. llvm::Value *FiniAddr = Fini
  6050. ? CGF.EmitCastToVoidPtr(Fini)
  6051. : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
  6052. CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
  6053. // ElemLVal.reduce_comb = comb;
  6054. LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
  6055. llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
  6056. CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
  6057. RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
  6058. CGF.EmitStoreOfScalar(CombAddr, CombLVal);
  6059. // ElemLVal.flags = 0;
  6060. LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
  6061. if (DelayedCreation) {
  6062. CGF.EmitStoreOfScalar(
  6063. llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
  6064. FlagsLVal);
  6065. } else
  6066. CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
  6067. }
  6068. // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
  6069. // *data);
  6070. llvm::Value *Args[] = {
  6071. CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
  6072. /*isSigned=*/true),
  6073. llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
  6074. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
  6075. CGM.VoidPtrTy)};
  6076. return CGF.EmitRuntimeCall(
  6077. createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
  6078. }
  6079. void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
  6080. SourceLocation Loc,
  6081. ReductionCodeGen &RCG,
  6082. unsigned N) {
  6083. auto Sizes = RCG.getSizes(N);
  6084. // Emit threadprivate global variable if the type is non-constant
  6085. // (Sizes.second = nullptr).
  6086. if (Sizes.second) {
  6087. llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
  6088. /*isSigned=*/false);
  6089. Address SizeAddr = getAddrOfArtificialThreadPrivate(
  6090. CGF, CGM.getContext().getSizeType(),
  6091. generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
  6092. CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
  6093. }
  6094. // Store address of the original reduction item if custom initializer is used.
  6095. if (RCG.usesReductionInitializer(N)) {
  6096. Address SharedAddr = getAddrOfArtificialThreadPrivate(
  6097. CGF, CGM.getContext().VoidPtrTy,
  6098. generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
  6099. CGF.Builder.CreateStore(
  6100. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  6101. RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
  6102. SharedAddr, /*IsVolatile=*/false);
  6103. }
  6104. }
  6105. Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
  6106. SourceLocation Loc,
  6107. llvm::Value *ReductionsPtr,
  6108. LValue SharedLVal) {
  6109. // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
  6110. // *d);
  6111. llvm::Value *Args[] = {
  6112. CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
  6113. /*isSigned=*/true),
  6114. ReductionsPtr,
  6115. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
  6116. CGM.VoidPtrTy)};
  6117. return Address(
  6118. CGF.EmitRuntimeCall(
  6119. createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
  6120. SharedLVal.getAlignment());
  6121. }
  6122. void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
  6123. SourceLocation Loc) {
  6124. if (!CGF.HaveInsertPoint())
  6125. return;
  6126. // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
  6127. // global_tid);
  6128. llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
  6129. // Ignore return result until untied tasks are supported.
  6130. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
  6131. if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
  6132. Region->emitUntiedSwitch(CGF);
  6133. }
  6134. void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
  6135. OpenMPDirectiveKind InnerKind,
  6136. const RegionCodeGenTy &CodeGen,
  6137. bool HasCancel) {
  6138. if (!CGF.HaveInsertPoint())
  6139. return;
  6140. InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
  6141. CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
  6142. }
  6143. namespace {
  6144. enum RTCancelKind {
  6145. CancelNoreq = 0,
  6146. CancelParallel = 1,
  6147. CancelLoop = 2,
  6148. CancelSections = 3,
  6149. CancelTaskgroup = 4
  6150. };
  6151. } // anonymous namespace
  6152. static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
  6153. RTCancelKind CancelKind = CancelNoreq;
  6154. if (CancelRegion == OMPD_parallel)
  6155. CancelKind = CancelParallel;
  6156. else if (CancelRegion == OMPD_for)
  6157. CancelKind = CancelLoop;
  6158. else if (CancelRegion == OMPD_sections)
  6159. CancelKind = CancelSections;
  6160. else {
  6161. assert(CancelRegion == OMPD_taskgroup);
  6162. CancelKind = CancelTaskgroup;
  6163. }
  6164. return CancelKind;
  6165. }
  6166. void CGOpenMPRuntime::emitCancellationPointCall(
  6167. CodeGenFunction &CGF, SourceLocation Loc,
  6168. OpenMPDirectiveKind CancelRegion) {
  6169. if (!CGF.HaveInsertPoint())
  6170. return;
  6171. // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
  6172. // global_tid, kmp_int32 cncl_kind);
  6173. if (auto *OMPRegionInfo =
  6174. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  6175. // For 'cancellation point taskgroup', the task region info may not have a
  6176. // cancel. This may instead happen in another adjacent task.
  6177. if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
  6178. llvm::Value *Args[] = {
  6179. emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
  6180. CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
  6181. // Ignore return result until untied tasks are supported.
  6182. llvm::Value *Result = CGF.EmitRuntimeCall(
  6183. createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
  6184. // if (__kmpc_cancellationpoint()) {
  6185. // exit from construct;
  6186. // }
  6187. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  6188. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  6189. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  6190. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  6191. CGF.EmitBlock(ExitBB);
  6192. // exit from construct;
  6193. CodeGenFunction::JumpDest CancelDest =
  6194. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  6195. CGF.EmitBranchThroughCleanup(CancelDest);
  6196. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  6197. }
  6198. }
  6199. }
  6200. void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
  6201. const Expr *IfCond,
  6202. OpenMPDirectiveKind CancelRegion) {
  6203. if (!CGF.HaveInsertPoint())
  6204. return;
  6205. // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
  6206. // kmp_int32 cncl_kind);
  6207. if (auto *OMPRegionInfo =
  6208. dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
  6209. auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
  6210. PrePostActionTy &) {
  6211. CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
  6212. llvm::Value *Args[] = {
  6213. RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
  6214. CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
  6215. // Ignore return result until untied tasks are supported.
  6216. llvm::Value *Result = CGF.EmitRuntimeCall(
  6217. RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
  6218. // if (__kmpc_cancel()) {
  6219. // exit from construct;
  6220. // }
  6221. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
  6222. llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
  6223. llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
  6224. CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
  6225. CGF.EmitBlock(ExitBB);
  6226. // exit from construct;
  6227. CodeGenFunction::JumpDest CancelDest =
  6228. CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
  6229. CGF.EmitBranchThroughCleanup(CancelDest);
  6230. CGF.EmitBlock(ContBB, /*IsFinished=*/true);
  6231. };
  6232. if (IfCond) {
  6233. emitOMPIfClause(CGF, IfCond, ThenGen,
  6234. [](CodeGenFunction &, PrePostActionTy &) {});
  6235. } else {
  6236. RegionCodeGenTy ThenRCG(ThenGen);
  6237. ThenRCG(CGF);
  6238. }
  6239. }
  6240. }
  6241. void CGOpenMPRuntime::emitTargetOutlinedFunction(
  6242. const OMPExecutableDirective &D, StringRef ParentName,
  6243. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  6244. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  6245. assert(!ParentName.empty() && "Invalid target region parent name!");
  6246. HasEmittedTargetRegion = true;
  6247. emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
  6248. IsOffloadEntry, CodeGen);
  6249. }
  6250. void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
  6251. const OMPExecutableDirective &D, StringRef ParentName,
  6252. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  6253. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  6254. // Create a unique name for the entry function using the source location
  6255. // information of the current target region. The name will be something like:
  6256. //
  6257. // __omp_offloading_DD_FFFF_PP_lBB
  6258. //
  6259. // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
  6260. // mangled name of the function that encloses the target region and BB is the
  6261. // line number of the target region.
  6262. unsigned DeviceID;
  6263. unsigned FileID;
  6264. unsigned Line;
  6265. getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
  6266. Line);
  6267. SmallString<64> EntryFnName;
  6268. {
  6269. llvm::raw_svector_ostream OS(EntryFnName);
  6270. OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
  6271. << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
  6272. }
  6273. const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
  6274. CodeGenFunction CGF(CGM, true);
  6275. CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
  6276. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6277. OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
  6278. // If this target outline function is not an offload entry, we don't need to
  6279. // register it.
  6280. if (!IsOffloadEntry)
  6281. return;
  6282. // The target region ID is used by the runtime library to identify the current
  6283. // target region, so it only has to be unique and not necessarily point to
  6284. // anything. It could be the pointer to the outlined function that implements
  6285. // the target region, but we aren't using that so that the compiler doesn't
  6286. // need to keep that, and could therefore inline the host function if proven
  6287. // worthwhile during optimization. In the other hand, if emitting code for the
  6288. // device, the ID has to be the function address so that it can retrieved from
  6289. // the offloading entry and launched by the runtime library. We also mark the
  6290. // outlined function to have external linkage in case we are emitting code for
  6291. // the device, because these functions will be entry points to the device.
  6292. if (CGM.getLangOpts().OpenMPIsDevice) {
  6293. OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
  6294. OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
  6295. OutlinedFn->setDSOLocal(false);
  6296. } else {
  6297. std::string Name = getName({EntryFnName, "region_id"});
  6298. OutlinedFnID = new llvm::GlobalVariable(
  6299. CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
  6300. llvm::GlobalValue::WeakAnyLinkage,
  6301. llvm::Constant::getNullValue(CGM.Int8Ty), Name);
  6302. }
  6303. // Register the information for the entry associated with this target region.
  6304. OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
  6305. DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
  6306. OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
  6307. }
  6308. /// Checks if the expression is constant or does not have non-trivial function
  6309. /// calls.
  6310. static bool isTrivial(ASTContext &Ctx, const Expr * E) {
  6311. // We can skip constant expressions.
  6312. // We can skip expressions with trivial calls or simple expressions.
  6313. return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
  6314. !E->hasNonTrivialCall(Ctx)) &&
  6315. !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
  6316. }
  6317. const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
  6318. const Stmt *Body) {
  6319. const Stmt *Child = Body->IgnoreContainers();
  6320. while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
  6321. Child = nullptr;
  6322. for (const Stmt *S : C->body()) {
  6323. if (const auto *E = dyn_cast<Expr>(S)) {
  6324. if (isTrivial(Ctx, E))
  6325. continue;
  6326. }
  6327. // Some of the statements can be ignored.
  6328. if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
  6329. isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
  6330. continue;
  6331. // Analyze declarations.
  6332. if (const auto *DS = dyn_cast<DeclStmt>(S)) {
  6333. if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
  6334. if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
  6335. isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
  6336. isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
  6337. isa<UsingDirectiveDecl>(D) ||
  6338. isa<OMPDeclareReductionDecl>(D) ||
  6339. isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
  6340. return true;
  6341. const auto *VD = dyn_cast<VarDecl>(D);
  6342. if (!VD)
  6343. return false;
  6344. return VD->isConstexpr() ||
  6345. ((VD->getType().isTrivialType(Ctx) ||
  6346. VD->getType()->isReferenceType()) &&
  6347. (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
  6348. }))
  6349. continue;
  6350. }
  6351. // Found multiple children - cannot get the one child only.
  6352. if (Child)
  6353. return nullptr;
  6354. Child = S;
  6355. }
  6356. if (Child)
  6357. Child = Child->IgnoreContainers();
  6358. }
  6359. return Child;
  6360. }
  6361. /// Emit the number of teams for a target directive. Inspect the num_teams
  6362. /// clause associated with a teams construct combined or closely nested
  6363. /// with the target directive.
  6364. ///
  6365. /// Emit a team of size one for directives such as 'target parallel' that
  6366. /// have no associated teams construct.
  6367. ///
  6368. /// Otherwise, return nullptr.
  6369. static llvm::Value *
  6370. emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
  6371. const OMPExecutableDirective &D) {
  6372. assert(!CGF.getLangOpts().OpenMPIsDevice &&
  6373. "Clauses associated with the teams directive expected to be emitted "
  6374. "only for the host!");
  6375. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  6376. assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
  6377. "Expected target-based executable directive.");
  6378. CGBuilderTy &Bld = CGF.Builder;
  6379. switch (DirectiveKind) {
  6380. case OMPD_target: {
  6381. const auto *CS = D.getInnermostCapturedStmt();
  6382. const auto *Body =
  6383. CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
  6384. const Stmt *ChildStmt =
  6385. CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
  6386. if (const auto *NestedDir =
  6387. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  6388. if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
  6389. if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
  6390. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6391. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6392. const Expr *NumTeams =
  6393. NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
  6394. llvm::Value *NumTeamsVal =
  6395. CGF.EmitScalarExpr(NumTeams,
  6396. /*IgnoreResultAssign*/ true);
  6397. return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
  6398. /*isSigned=*/true);
  6399. }
  6400. return Bld.getInt32(0);
  6401. }
  6402. if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
  6403. isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
  6404. return Bld.getInt32(1);
  6405. return Bld.getInt32(0);
  6406. }
  6407. return nullptr;
  6408. }
  6409. case OMPD_target_teams:
  6410. case OMPD_target_teams_distribute:
  6411. case OMPD_target_teams_distribute_simd:
  6412. case OMPD_target_teams_distribute_parallel_for:
  6413. case OMPD_target_teams_distribute_parallel_for_simd: {
  6414. if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
  6415. CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
  6416. const Expr *NumTeams =
  6417. D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
  6418. llvm::Value *NumTeamsVal =
  6419. CGF.EmitScalarExpr(NumTeams,
  6420. /*IgnoreResultAssign*/ true);
  6421. return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
  6422. /*isSigned=*/true);
  6423. }
  6424. return Bld.getInt32(0);
  6425. }
  6426. case OMPD_target_parallel:
  6427. case OMPD_target_parallel_for:
  6428. case OMPD_target_parallel_for_simd:
  6429. case OMPD_target_simd:
  6430. return Bld.getInt32(1);
  6431. case OMPD_parallel:
  6432. case OMPD_for:
  6433. case OMPD_parallel_for:
  6434. case OMPD_parallel_sections:
  6435. case OMPD_for_simd:
  6436. case OMPD_parallel_for_simd:
  6437. case OMPD_cancel:
  6438. case OMPD_cancellation_point:
  6439. case OMPD_ordered:
  6440. case OMPD_threadprivate:
  6441. case OMPD_allocate:
  6442. case OMPD_task:
  6443. case OMPD_simd:
  6444. case OMPD_sections:
  6445. case OMPD_section:
  6446. case OMPD_single:
  6447. case OMPD_master:
  6448. case OMPD_critical:
  6449. case OMPD_taskyield:
  6450. case OMPD_barrier:
  6451. case OMPD_taskwait:
  6452. case OMPD_taskgroup:
  6453. case OMPD_atomic:
  6454. case OMPD_flush:
  6455. case OMPD_teams:
  6456. case OMPD_target_data:
  6457. case OMPD_target_exit_data:
  6458. case OMPD_target_enter_data:
  6459. case OMPD_distribute:
  6460. case OMPD_distribute_simd:
  6461. case OMPD_distribute_parallel_for:
  6462. case OMPD_distribute_parallel_for_simd:
  6463. case OMPD_teams_distribute:
  6464. case OMPD_teams_distribute_simd:
  6465. case OMPD_teams_distribute_parallel_for:
  6466. case OMPD_teams_distribute_parallel_for_simd:
  6467. case OMPD_target_update:
  6468. case OMPD_declare_simd:
  6469. case OMPD_declare_variant:
  6470. case OMPD_declare_target:
  6471. case OMPD_end_declare_target:
  6472. case OMPD_declare_reduction:
  6473. case OMPD_declare_mapper:
  6474. case OMPD_taskloop:
  6475. case OMPD_taskloop_simd:
  6476. case OMPD_requires:
  6477. case OMPD_unknown:
  6478. break;
  6479. }
  6480. llvm_unreachable("Unexpected directive kind.");
  6481. }
  6482. static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
  6483. llvm::Value *DefaultThreadLimitVal) {
  6484. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6485. CGF.getContext(), CS->getCapturedStmt());
  6486. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6487. if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
  6488. llvm::Value *NumThreads = nullptr;
  6489. llvm::Value *CondVal = nullptr;
  6490. // Handle if clause. If if clause present, the number of threads is
  6491. // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
  6492. if (Dir->hasClausesOfKind<OMPIfClause>()) {
  6493. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6494. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6495. const OMPIfClause *IfClause = nullptr;
  6496. for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
  6497. if (C->getNameModifier() == OMPD_unknown ||
  6498. C->getNameModifier() == OMPD_parallel) {
  6499. IfClause = C;
  6500. break;
  6501. }
  6502. }
  6503. if (IfClause) {
  6504. const Expr *Cond = IfClause->getCondition();
  6505. bool Result;
  6506. if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
  6507. if (!Result)
  6508. return CGF.Builder.getInt32(1);
  6509. } else {
  6510. CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
  6511. if (const auto *PreInit =
  6512. cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
  6513. for (const auto *I : PreInit->decls()) {
  6514. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6515. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6516. } else {
  6517. CodeGenFunction::AutoVarEmission Emission =
  6518. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6519. CGF.EmitAutoVarCleanups(Emission);
  6520. }
  6521. }
  6522. }
  6523. CondVal = CGF.EvaluateExprAsBool(Cond);
  6524. }
  6525. }
  6526. }
  6527. // Check the value of num_threads clause iff if clause was not specified
  6528. // or is not evaluated to false.
  6529. if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
  6530. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6531. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6532. const auto *NumThreadsClause =
  6533. Dir->getSingleClause<OMPNumThreadsClause>();
  6534. CodeGenFunction::LexicalScope Scope(
  6535. CGF, NumThreadsClause->getNumThreads()->getSourceRange());
  6536. if (const auto *PreInit =
  6537. cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
  6538. for (const auto *I : PreInit->decls()) {
  6539. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6540. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6541. } else {
  6542. CodeGenFunction::AutoVarEmission Emission =
  6543. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6544. CGF.EmitAutoVarCleanups(Emission);
  6545. }
  6546. }
  6547. }
  6548. NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
  6549. NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
  6550. /*isSigned=*/false);
  6551. if (DefaultThreadLimitVal)
  6552. NumThreads = CGF.Builder.CreateSelect(
  6553. CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
  6554. DefaultThreadLimitVal, NumThreads);
  6555. } else {
  6556. NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
  6557. : CGF.Builder.getInt32(0);
  6558. }
  6559. // Process condition of the if clause.
  6560. if (CondVal) {
  6561. NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
  6562. CGF.Builder.getInt32(1));
  6563. }
  6564. return NumThreads;
  6565. }
  6566. if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
  6567. return CGF.Builder.getInt32(1);
  6568. return DefaultThreadLimitVal;
  6569. }
  6570. return DefaultThreadLimitVal ? DefaultThreadLimitVal
  6571. : CGF.Builder.getInt32(0);
  6572. }
  6573. /// Emit the number of threads for a target directive. Inspect the
  6574. /// thread_limit clause associated with a teams construct combined or closely
  6575. /// nested with the target directive.
  6576. ///
  6577. /// Emit the num_threads clause for directives such as 'target parallel' that
  6578. /// have no associated teams construct.
  6579. ///
  6580. /// Otherwise, return nullptr.
  6581. static llvm::Value *
  6582. emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
  6583. const OMPExecutableDirective &D) {
  6584. assert(!CGF.getLangOpts().OpenMPIsDevice &&
  6585. "Clauses associated with the teams directive expected to be emitted "
  6586. "only for the host!");
  6587. OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
  6588. assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
  6589. "Expected target-based executable directive.");
  6590. CGBuilderTy &Bld = CGF.Builder;
  6591. llvm::Value *ThreadLimitVal = nullptr;
  6592. llvm::Value *NumThreadsVal = nullptr;
  6593. switch (DirectiveKind) {
  6594. case OMPD_target: {
  6595. const CapturedStmt *CS = D.getInnermostCapturedStmt();
  6596. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6597. return NumThreads;
  6598. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6599. CGF.getContext(), CS->getCapturedStmt());
  6600. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6601. if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
  6602. CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
  6603. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
  6604. const auto *ThreadLimitClause =
  6605. Dir->getSingleClause<OMPThreadLimitClause>();
  6606. CodeGenFunction::LexicalScope Scope(
  6607. CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
  6608. if (const auto *PreInit =
  6609. cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
  6610. for (const auto *I : PreInit->decls()) {
  6611. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  6612. CGF.EmitVarDecl(cast<VarDecl>(*I));
  6613. } else {
  6614. CodeGenFunction::AutoVarEmission Emission =
  6615. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  6616. CGF.EmitAutoVarCleanups(Emission);
  6617. }
  6618. }
  6619. }
  6620. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6621. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6622. ThreadLimitVal =
  6623. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6624. }
  6625. if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
  6626. !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
  6627. CS = Dir->getInnermostCapturedStmt();
  6628. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6629. CGF.getContext(), CS->getCapturedStmt());
  6630. Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
  6631. }
  6632. if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
  6633. !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
  6634. CS = Dir->getInnermostCapturedStmt();
  6635. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6636. return NumThreads;
  6637. }
  6638. if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
  6639. return Bld.getInt32(1);
  6640. }
  6641. return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
  6642. }
  6643. case OMPD_target_teams: {
  6644. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6645. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6646. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6647. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6648. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6649. ThreadLimitVal =
  6650. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6651. }
  6652. const CapturedStmt *CS = D.getInnermostCapturedStmt();
  6653. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6654. return NumThreads;
  6655. const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
  6656. CGF.getContext(), CS->getCapturedStmt());
  6657. if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
  6658. if (Dir->getDirectiveKind() == OMPD_distribute) {
  6659. CS = Dir->getInnermostCapturedStmt();
  6660. if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
  6661. return NumThreads;
  6662. }
  6663. }
  6664. return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
  6665. }
  6666. case OMPD_target_teams_distribute:
  6667. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6668. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6669. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6670. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6671. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6672. ThreadLimitVal =
  6673. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6674. }
  6675. return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
  6676. case OMPD_target_parallel:
  6677. case OMPD_target_parallel_for:
  6678. case OMPD_target_parallel_for_simd:
  6679. case OMPD_target_teams_distribute_parallel_for:
  6680. case OMPD_target_teams_distribute_parallel_for_simd: {
  6681. llvm::Value *CondVal = nullptr;
  6682. // Handle if clause. If if clause present, the number of threads is
  6683. // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
  6684. if (D.hasClausesOfKind<OMPIfClause>()) {
  6685. const OMPIfClause *IfClause = nullptr;
  6686. for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
  6687. if (C->getNameModifier() == OMPD_unknown ||
  6688. C->getNameModifier() == OMPD_parallel) {
  6689. IfClause = C;
  6690. break;
  6691. }
  6692. }
  6693. if (IfClause) {
  6694. const Expr *Cond = IfClause->getCondition();
  6695. bool Result;
  6696. if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
  6697. if (!Result)
  6698. return Bld.getInt32(1);
  6699. } else {
  6700. CodeGenFunction::RunCleanupsScope Scope(CGF);
  6701. CondVal = CGF.EvaluateExprAsBool(Cond);
  6702. }
  6703. }
  6704. }
  6705. if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
  6706. CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
  6707. const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
  6708. llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
  6709. ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
  6710. ThreadLimitVal =
  6711. Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
  6712. }
  6713. if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
  6714. CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
  6715. const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
  6716. llvm::Value *NumThreads = CGF.EmitScalarExpr(
  6717. NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
  6718. NumThreadsVal =
  6719. Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
  6720. ThreadLimitVal = ThreadLimitVal
  6721. ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
  6722. ThreadLimitVal),
  6723. NumThreadsVal, ThreadLimitVal)
  6724. : NumThreadsVal;
  6725. }
  6726. if (!ThreadLimitVal)
  6727. ThreadLimitVal = Bld.getInt32(0);
  6728. if (CondVal)
  6729. return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
  6730. return ThreadLimitVal;
  6731. }
  6732. case OMPD_target_teams_distribute_simd:
  6733. case OMPD_target_simd:
  6734. return Bld.getInt32(1);
  6735. case OMPD_parallel:
  6736. case OMPD_for:
  6737. case OMPD_parallel_for:
  6738. case OMPD_parallel_sections:
  6739. case OMPD_for_simd:
  6740. case OMPD_parallel_for_simd:
  6741. case OMPD_cancel:
  6742. case OMPD_cancellation_point:
  6743. case OMPD_ordered:
  6744. case OMPD_threadprivate:
  6745. case OMPD_allocate:
  6746. case OMPD_task:
  6747. case OMPD_simd:
  6748. case OMPD_sections:
  6749. case OMPD_section:
  6750. case OMPD_single:
  6751. case OMPD_master:
  6752. case OMPD_critical:
  6753. case OMPD_taskyield:
  6754. case OMPD_barrier:
  6755. case OMPD_taskwait:
  6756. case OMPD_taskgroup:
  6757. case OMPD_atomic:
  6758. case OMPD_flush:
  6759. case OMPD_teams:
  6760. case OMPD_target_data:
  6761. case OMPD_target_exit_data:
  6762. case OMPD_target_enter_data:
  6763. case OMPD_distribute:
  6764. case OMPD_distribute_simd:
  6765. case OMPD_distribute_parallel_for:
  6766. case OMPD_distribute_parallel_for_simd:
  6767. case OMPD_teams_distribute:
  6768. case OMPD_teams_distribute_simd:
  6769. case OMPD_teams_distribute_parallel_for:
  6770. case OMPD_teams_distribute_parallel_for_simd:
  6771. case OMPD_target_update:
  6772. case OMPD_declare_simd:
  6773. case OMPD_declare_variant:
  6774. case OMPD_declare_target:
  6775. case OMPD_end_declare_target:
  6776. case OMPD_declare_reduction:
  6777. case OMPD_declare_mapper:
  6778. case OMPD_taskloop:
  6779. case OMPD_taskloop_simd:
  6780. case OMPD_requires:
  6781. case OMPD_unknown:
  6782. break;
  6783. }
  6784. llvm_unreachable("Unsupported directive kind.");
  6785. }
  6786. namespace {
  6787. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
  6788. // Utility to handle information from clauses associated with a given
  6789. // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
  6790. // It provides a convenient interface to obtain the information and generate
  6791. // code for that information.
  6792. class MappableExprsHandler {
  6793. public:
  6794. /// Values for bit flags used to specify the mapping type for
  6795. /// offloading.
  6796. enum OpenMPOffloadMappingFlags : uint64_t {
  6797. /// No flags
  6798. OMP_MAP_NONE = 0x0,
  6799. /// Allocate memory on the device and move data from host to device.
  6800. OMP_MAP_TO = 0x01,
  6801. /// Allocate memory on the device and move data from device to host.
  6802. OMP_MAP_FROM = 0x02,
  6803. /// Always perform the requested mapping action on the element, even
  6804. /// if it was already mapped before.
  6805. OMP_MAP_ALWAYS = 0x04,
  6806. /// Delete the element from the device environment, ignoring the
  6807. /// current reference count associated with the element.
  6808. OMP_MAP_DELETE = 0x08,
  6809. /// The element being mapped is a pointer-pointee pair; both the
  6810. /// pointer and the pointee should be mapped.
  6811. OMP_MAP_PTR_AND_OBJ = 0x10,
  6812. /// This flags signals that the base address of an entry should be
  6813. /// passed to the target kernel as an argument.
  6814. OMP_MAP_TARGET_PARAM = 0x20,
  6815. /// Signal that the runtime library has to return the device pointer
  6816. /// in the current position for the data being mapped. Used when we have the
  6817. /// use_device_ptr clause.
  6818. OMP_MAP_RETURN_PARAM = 0x40,
  6819. /// This flag signals that the reference being passed is a pointer to
  6820. /// private data.
  6821. OMP_MAP_PRIVATE = 0x80,
  6822. /// Pass the element to the device by value.
  6823. OMP_MAP_LITERAL = 0x100,
  6824. /// Implicit map
  6825. OMP_MAP_IMPLICIT = 0x200,
  6826. /// Close is a hint to the runtime to allocate memory close to
  6827. /// the target device.
  6828. OMP_MAP_CLOSE = 0x400,
  6829. /// The 16 MSBs of the flags indicate whether the entry is member of some
  6830. /// struct/class.
  6831. OMP_MAP_MEMBER_OF = 0xffff000000000000,
  6832. LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
  6833. };
  6834. /// Get the offset of the OMP_MAP_MEMBER_OF field.
  6835. static unsigned getFlagMemberOffset() {
  6836. unsigned Offset = 0;
  6837. for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
  6838. Remain = Remain >> 1)
  6839. Offset++;
  6840. return Offset;
  6841. }
  6842. /// Class that associates information with a base pointer to be passed to the
  6843. /// runtime library.
  6844. class BasePointerInfo {
  6845. /// The base pointer.
  6846. llvm::Value *Ptr = nullptr;
  6847. /// The base declaration that refers to this device pointer, or null if
  6848. /// there is none.
  6849. const ValueDecl *DevPtrDecl = nullptr;
  6850. public:
  6851. BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
  6852. : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
  6853. llvm::Value *operator*() const { return Ptr; }
  6854. const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
  6855. void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
  6856. };
  6857. using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
  6858. using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
  6859. using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
  6860. /// Map between a struct and the its lowest & highest elements which have been
  6861. /// mapped.
  6862. /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
  6863. /// HE(FieldIndex, Pointer)}
  6864. struct StructRangeInfoTy {
  6865. std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
  6866. 0, Address::invalid()};
  6867. std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
  6868. 0, Address::invalid()};
  6869. Address Base = Address::invalid();
  6870. };
  6871. private:
  6872. /// Kind that defines how a device pointer has to be returned.
  6873. struct MapInfo {
  6874. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  6875. OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
  6876. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  6877. bool ReturnDevicePointer = false;
  6878. bool IsImplicit = false;
  6879. MapInfo() = default;
  6880. MapInfo(
  6881. OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
  6882. OpenMPMapClauseKind MapType,
  6883. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  6884. bool ReturnDevicePointer, bool IsImplicit)
  6885. : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
  6886. ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
  6887. };
  6888. /// If use_device_ptr is used on a pointer which is a struct member and there
  6889. /// is no map information about it, then emission of that entry is deferred
  6890. /// until the whole struct has been processed.
  6891. struct DeferredDevicePtrEntryTy {
  6892. const Expr *IE = nullptr;
  6893. const ValueDecl *VD = nullptr;
  6894. DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
  6895. : IE(IE), VD(VD) {}
  6896. };
  6897. /// The target directive from where the mappable clauses were extracted. It
  6898. /// is either a executable directive or a user-defined mapper directive.
  6899. llvm::PointerUnion<const OMPExecutableDirective *,
  6900. const OMPDeclareMapperDecl *>
  6901. CurDir;
  6902. /// Function the directive is being generated for.
  6903. CodeGenFunction &CGF;
  6904. /// Set of all first private variables in the current directive.
  6905. /// bool data is set to true if the variable is implicitly marked as
  6906. /// firstprivate, false otherwise.
  6907. llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
  6908. /// Map between device pointer declarations and their expression components.
  6909. /// The key value for declarations in 'this' is null.
  6910. llvm::DenseMap<
  6911. const ValueDecl *,
  6912. SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
  6913. DevPointersMap;
  6914. llvm::Value *getExprTypeSize(const Expr *E) const {
  6915. QualType ExprTy = E->getType().getCanonicalType();
  6916. // Reference types are ignored for mapping purposes.
  6917. if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
  6918. ExprTy = RefTy->getPointeeType().getCanonicalType();
  6919. // Given that an array section is considered a built-in type, we need to
  6920. // do the calculation based on the length of the section instead of relying
  6921. // on CGF.getTypeSize(E->getType()).
  6922. if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
  6923. QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
  6924. OAE->getBase()->IgnoreParenImpCasts())
  6925. .getCanonicalType();
  6926. // If there is no length associated with the expression and lower bound is
  6927. // not specified too, that means we are using the whole length of the
  6928. // base.
  6929. if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
  6930. !OAE->getLowerBound())
  6931. return CGF.getTypeSize(BaseTy);
  6932. llvm::Value *ElemSize;
  6933. if (const auto *PTy = BaseTy->getAs<PointerType>()) {
  6934. ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
  6935. } else {
  6936. const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
  6937. assert(ATy && "Expecting array type if not a pointer type.");
  6938. ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
  6939. }
  6940. // If we don't have a length at this point, that is because we have an
  6941. // array section with a single element.
  6942. if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
  6943. return ElemSize;
  6944. if (const Expr *LenExpr = OAE->getLength()) {
  6945. llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
  6946. LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
  6947. CGF.getContext().getSizeType(),
  6948. LenExpr->getExprLoc());
  6949. return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
  6950. }
  6951. assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
  6952. OAE->getLowerBound() && "expected array_section[lb:].");
  6953. // Size = sizetype - lb * elemtype;
  6954. llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
  6955. llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
  6956. LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
  6957. CGF.getContext().getSizeType(),
  6958. OAE->getLowerBound()->getExprLoc());
  6959. LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
  6960. llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
  6961. llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
  6962. LengthVal = CGF.Builder.CreateSelect(
  6963. Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
  6964. return LengthVal;
  6965. }
  6966. return CGF.getTypeSize(ExprTy);
  6967. }
  6968. /// Return the corresponding bits for a given map clause modifier. Add
  6969. /// a flag marking the map as a pointer if requested. Add a flag marking the
  6970. /// map as the first one of a series of maps that relate to the same map
  6971. /// expression.
  6972. OpenMPOffloadMappingFlags getMapTypeBits(
  6973. OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
  6974. bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
  6975. OpenMPOffloadMappingFlags Bits =
  6976. IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
  6977. switch (MapType) {
  6978. case OMPC_MAP_alloc:
  6979. case OMPC_MAP_release:
  6980. // alloc and release is the default behavior in the runtime library, i.e.
  6981. // if we don't pass any bits alloc/release that is what the runtime is
  6982. // going to do. Therefore, we don't need to signal anything for these two
  6983. // type modifiers.
  6984. break;
  6985. case OMPC_MAP_to:
  6986. Bits |= OMP_MAP_TO;
  6987. break;
  6988. case OMPC_MAP_from:
  6989. Bits |= OMP_MAP_FROM;
  6990. break;
  6991. case OMPC_MAP_tofrom:
  6992. Bits |= OMP_MAP_TO | OMP_MAP_FROM;
  6993. break;
  6994. case OMPC_MAP_delete:
  6995. Bits |= OMP_MAP_DELETE;
  6996. break;
  6997. case OMPC_MAP_unknown:
  6998. llvm_unreachable("Unexpected map type!");
  6999. }
  7000. if (AddPtrFlag)
  7001. Bits |= OMP_MAP_PTR_AND_OBJ;
  7002. if (AddIsTargetParamFlag)
  7003. Bits |= OMP_MAP_TARGET_PARAM;
  7004. if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
  7005. != MapModifiers.end())
  7006. Bits |= OMP_MAP_ALWAYS;
  7007. if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
  7008. != MapModifiers.end())
  7009. Bits |= OMP_MAP_CLOSE;
  7010. return Bits;
  7011. }
  7012. /// Return true if the provided expression is a final array section. A
  7013. /// final array section, is one whose length can't be proved to be one.
  7014. bool isFinalArraySectionExpression(const Expr *E) const {
  7015. const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
  7016. // It is not an array section and therefore not a unity-size one.
  7017. if (!OASE)
  7018. return false;
  7019. // An array section with no colon always refer to a single element.
  7020. if (OASE->getColonLoc().isInvalid())
  7021. return false;
  7022. const Expr *Length = OASE->getLength();
  7023. // If we don't have a length we have to check if the array has size 1
  7024. // for this dimension. Also, we should always expect a length if the
  7025. // base type is pointer.
  7026. if (!Length) {
  7027. QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
  7028. OASE->getBase()->IgnoreParenImpCasts())
  7029. .getCanonicalType();
  7030. if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
  7031. return ATy->getSize().getSExtValue() != 1;
  7032. // If we don't have a constant dimension length, we have to consider
  7033. // the current section as having any size, so it is not necessarily
  7034. // unitary. If it happen to be unity size, that's user fault.
  7035. return true;
  7036. }
  7037. // Check if the length evaluates to 1.
  7038. Expr::EvalResult Result;
  7039. if (!Length->EvaluateAsInt(Result, CGF.getContext()))
  7040. return true; // Can have more that size 1.
  7041. llvm::APSInt ConstLength = Result.Val.getInt();
  7042. return ConstLength.getSExtValue() != 1;
  7043. }
  7044. /// Generate the base pointers, section pointers, sizes and map type
  7045. /// bits for the provided map type, map modifier, and expression components.
  7046. /// \a IsFirstComponent should be set to true if the provided set of
  7047. /// components is the first associated with a capture.
  7048. void generateInfoForComponentList(
  7049. OpenMPMapClauseKind MapType,
  7050. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  7051. OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
  7052. MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
  7053. MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
  7054. StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
  7055. bool IsImplicit,
  7056. ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
  7057. OverlappedElements = llvm::None) const {
  7058. // The following summarizes what has to be generated for each map and the
  7059. // types below. The generated information is expressed in this order:
  7060. // base pointer, section pointer, size, flags
  7061. // (to add to the ones that come from the map type and modifier).
  7062. //
  7063. // double d;
  7064. // int i[100];
  7065. // float *p;
  7066. //
  7067. // struct S1 {
  7068. // int i;
  7069. // float f[50];
  7070. // }
  7071. // struct S2 {
  7072. // int i;
  7073. // float f[50];
  7074. // S1 s;
  7075. // double *p;
  7076. // struct S2 *ps;
  7077. // }
  7078. // S2 s;
  7079. // S2 *ps;
  7080. //
  7081. // map(d)
  7082. // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
  7083. //
  7084. // map(i)
  7085. // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
  7086. //
  7087. // map(i[1:23])
  7088. // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
  7089. //
  7090. // map(p)
  7091. // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
  7092. //
  7093. // map(p[1:24])
  7094. // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
  7095. //
  7096. // map(s)
  7097. // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
  7098. //
  7099. // map(s.i)
  7100. // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
  7101. //
  7102. // map(s.s.f)
  7103. // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
  7104. //
  7105. // map(s.p)
  7106. // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
  7107. //
  7108. // map(to: s.p[:22])
  7109. // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
  7110. // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
  7111. // &(s.p), &(s.p[0]), 22*sizeof(double),
  7112. // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
  7113. // (*) alloc space for struct members, only this is a target parameter
  7114. // (**) map the pointer (nothing to be mapped in this example) (the compiler
  7115. // optimizes this entry out, same in the examples below)
  7116. // (***) map the pointee (map: to)
  7117. //
  7118. // map(s.ps)
  7119. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
  7120. //
  7121. // map(from: s.ps->s.i)
  7122. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  7123. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  7124. // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  7125. //
  7126. // map(to: s.ps->ps)
  7127. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  7128. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  7129. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
  7130. //
  7131. // map(s.ps->ps->ps)
  7132. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  7133. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  7134. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  7135. // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
  7136. //
  7137. // map(to: s.ps->ps->s.f[:22])
  7138. // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
  7139. // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
  7140. // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  7141. // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
  7142. //
  7143. // map(ps)
  7144. // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
  7145. //
  7146. // map(ps->i)
  7147. // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
  7148. //
  7149. // map(ps->s.f)
  7150. // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
  7151. //
  7152. // map(from: ps->p)
  7153. // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
  7154. //
  7155. // map(to: ps->p[:22])
  7156. // ps, &(ps->p), sizeof(double*), TARGET_PARAM
  7157. // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
  7158. // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
  7159. //
  7160. // map(ps->ps)
  7161. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
  7162. //
  7163. // map(from: ps->ps->s.i)
  7164. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  7165. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  7166. // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  7167. //
  7168. // map(from: ps->ps->ps)
  7169. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  7170. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  7171. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  7172. //
  7173. // map(ps->ps->ps->ps)
  7174. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  7175. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  7176. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  7177. // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
  7178. //
  7179. // map(to: ps->ps->ps->s.f[:22])
  7180. // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
  7181. // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
  7182. // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
  7183. // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
  7184. //
  7185. // map(to: s.f[:22]) map(from: s.p[:33])
  7186. // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
  7187. // sizeof(double*) (**), TARGET_PARAM
  7188. // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
  7189. // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
  7190. // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
  7191. // (*) allocate contiguous space needed to fit all mapped members even if
  7192. // we allocate space for members not mapped (in this example,
  7193. // s.f[22..49] and s.s are not mapped, yet we must allocate space for
  7194. // them as well because they fall between &s.f[0] and &s.p)
  7195. //
  7196. // map(from: s.f[:22]) map(to: ps->p[:33])
  7197. // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
  7198. // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
  7199. // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
  7200. // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
  7201. // (*) the struct this entry pertains to is the 2nd element in the list of
  7202. // arguments, hence MEMBER_OF(2)
  7203. //
  7204. // map(from: s.f[:22], s.s) map(to: ps->p[:33])
  7205. // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
  7206. // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
  7207. // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
  7208. // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
  7209. // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
  7210. // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
  7211. // (*) the struct this entry pertains to is the 4th element in the list
  7212. // of arguments, hence MEMBER_OF(4)
  7213. // Track if the map information being generated is the first for a capture.
  7214. bool IsCaptureFirstInfo = IsFirstComponentList;
  7215. // When the variable is on a declare target link or in a to clause with
  7216. // unified memory, a reference is needed to hold the host/device address
  7217. // of the variable.
  7218. bool RequiresReference = false;
  7219. // Scan the components from the base to the complete expression.
  7220. auto CI = Components.rbegin();
  7221. auto CE = Components.rend();
  7222. auto I = CI;
  7223. // Track if the map information being generated is the first for a list of
  7224. // components.
  7225. bool IsExpressionFirstInfo = true;
  7226. Address BP = Address::invalid();
  7227. const Expr *AssocExpr = I->getAssociatedExpression();
  7228. const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
  7229. const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
  7230. if (isa<MemberExpr>(AssocExpr)) {
  7231. // The base is the 'this' pointer. The content of the pointer is going
  7232. // to be the base of the field being mapped.
  7233. BP = CGF.LoadCXXThisAddress();
  7234. } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
  7235. (OASE &&
  7236. isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
  7237. BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
  7238. } else {
  7239. // The base is the reference to the variable.
  7240. // BP = &Var.
  7241. BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
  7242. if (const auto *VD =
  7243. dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
  7244. if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  7245. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
  7246. if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
  7247. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  7248. CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
  7249. RequiresReference = true;
  7250. BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
  7251. }
  7252. }
  7253. }
  7254. // If the variable is a pointer and is being dereferenced (i.e. is not
  7255. // the last component), the base has to be the pointer itself, not its
  7256. // reference. References are ignored for mapping purposes.
  7257. QualType Ty =
  7258. I->getAssociatedDeclaration()->getType().getNonReferenceType();
  7259. if (Ty->isAnyPointerType() && std::next(I) != CE) {
  7260. BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
  7261. // We do not need to generate individual map information for the
  7262. // pointer, it can be associated with the combined storage.
  7263. ++I;
  7264. }
  7265. }
  7266. // Track whether a component of the list should be marked as MEMBER_OF some
  7267. // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
  7268. // in a component list should be marked as MEMBER_OF, all subsequent entries
  7269. // do not belong to the base struct. E.g.
  7270. // struct S2 s;
  7271. // s.ps->ps->ps->f[:]
  7272. // (1) (2) (3) (4)
  7273. // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
  7274. // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
  7275. // is the pointee of ps(2) which is not member of struct s, so it should not
  7276. // be marked as such (it is still PTR_AND_OBJ).
  7277. // The variable is initialized to false so that PTR_AND_OBJ entries which
  7278. // are not struct members are not considered (e.g. array of pointers to
  7279. // data).
  7280. bool ShouldBeMemberOf = false;
  7281. // Variable keeping track of whether or not we have encountered a component
  7282. // in the component list which is a member expression. Useful when we have a
  7283. // pointer or a final array section, in which case it is the previous
  7284. // component in the list which tells us whether we have a member expression.
  7285. // E.g. X.f[:]
  7286. // While processing the final array section "[:]" it is "f" which tells us
  7287. // whether we are dealing with a member of a declared struct.
  7288. const MemberExpr *EncounteredME = nullptr;
  7289. for (; I != CE; ++I) {
  7290. // If the current component is member of a struct (parent struct) mark it.
  7291. if (!EncounteredME) {
  7292. EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
  7293. // If we encounter a PTR_AND_OBJ entry from now on it should be marked
  7294. // as MEMBER_OF the parent struct.
  7295. if (EncounteredME)
  7296. ShouldBeMemberOf = true;
  7297. }
  7298. auto Next = std::next(I);
  7299. // We need to generate the addresses and sizes if this is the last
  7300. // component, if the component is a pointer or if it is an array section
  7301. // whose length can't be proved to be one. If this is a pointer, it
  7302. // becomes the base address for the following components.
  7303. // A final array section, is one whose length can't be proved to be one.
  7304. bool IsFinalArraySection =
  7305. isFinalArraySectionExpression(I->getAssociatedExpression());
  7306. // Get information on whether the element is a pointer. Have to do a
  7307. // special treatment for array sections given that they are built-in
  7308. // types.
  7309. const auto *OASE =
  7310. dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
  7311. bool IsPointer =
  7312. (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
  7313. .getCanonicalType()
  7314. ->isAnyPointerType()) ||
  7315. I->getAssociatedExpression()->getType()->isAnyPointerType();
  7316. if (Next == CE || IsPointer || IsFinalArraySection) {
  7317. // If this is not the last component, we expect the pointer to be
  7318. // associated with an array expression or member expression.
  7319. assert((Next == CE ||
  7320. isa<MemberExpr>(Next->getAssociatedExpression()) ||
  7321. isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
  7322. isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
  7323. "Unexpected expression");
  7324. Address LB =
  7325. CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
  7326. // If this component is a pointer inside the base struct then we don't
  7327. // need to create any entry for it - it will be combined with the object
  7328. // it is pointing to into a single PTR_AND_OBJ entry.
  7329. bool IsMemberPointer =
  7330. IsPointer && EncounteredME &&
  7331. (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
  7332. EncounteredME);
  7333. if (!OverlappedElements.empty()) {
  7334. // Handle base element with the info for overlapped elements.
  7335. assert(!PartialStruct.Base.isValid() && "The base element is set.");
  7336. assert(Next == CE &&
  7337. "Expected last element for the overlapped elements.");
  7338. assert(!IsPointer &&
  7339. "Unexpected base element with the pointer type.");
  7340. // Mark the whole struct as the struct that requires allocation on the
  7341. // device.
  7342. PartialStruct.LowestElem = {0, LB};
  7343. CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
  7344. I->getAssociatedExpression()->getType());
  7345. Address HB = CGF.Builder.CreateConstGEP(
  7346. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
  7347. CGF.VoidPtrTy),
  7348. TypeSize.getQuantity() - 1);
  7349. PartialStruct.HighestElem = {
  7350. std::numeric_limits<decltype(
  7351. PartialStruct.HighestElem.first)>::max(),
  7352. HB};
  7353. PartialStruct.Base = BP;
  7354. // Emit data for non-overlapped data.
  7355. OpenMPOffloadMappingFlags Flags =
  7356. OMP_MAP_MEMBER_OF |
  7357. getMapTypeBits(MapType, MapModifiers, IsImplicit,
  7358. /*AddPtrFlag=*/false,
  7359. /*AddIsTargetParamFlag=*/false);
  7360. LB = BP;
  7361. llvm::Value *Size = nullptr;
  7362. // Do bitcopy of all non-overlapped structure elements.
  7363. for (OMPClauseMappableExprCommon::MappableExprComponentListRef
  7364. Component : OverlappedElements) {
  7365. Address ComponentLB = Address::invalid();
  7366. for (const OMPClauseMappableExprCommon::MappableComponent &MC :
  7367. Component) {
  7368. if (MC.getAssociatedDeclaration()) {
  7369. ComponentLB =
  7370. CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
  7371. .getAddress();
  7372. Size = CGF.Builder.CreatePtrDiff(
  7373. CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
  7374. CGF.EmitCastToVoidPtr(LB.getPointer()));
  7375. break;
  7376. }
  7377. }
  7378. BasePointers.push_back(BP.getPointer());
  7379. Pointers.push_back(LB.getPointer());
  7380. Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
  7381. /*isSigned=*/true));
  7382. Types.push_back(Flags);
  7383. LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
  7384. }
  7385. BasePointers.push_back(BP.getPointer());
  7386. Pointers.push_back(LB.getPointer());
  7387. Size = CGF.Builder.CreatePtrDiff(
  7388. CGF.EmitCastToVoidPtr(
  7389. CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
  7390. CGF.EmitCastToVoidPtr(LB.getPointer()));
  7391. Sizes.push_back(
  7392. CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
  7393. Types.push_back(Flags);
  7394. break;
  7395. }
  7396. llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
  7397. if (!IsMemberPointer) {
  7398. BasePointers.push_back(BP.getPointer());
  7399. Pointers.push_back(LB.getPointer());
  7400. Sizes.push_back(
  7401. CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
  7402. // We need to add a pointer flag for each map that comes from the
  7403. // same expression except for the first one. We also need to signal
  7404. // this map is the first one that relates with the current capture
  7405. // (there is a set of entries for each capture).
  7406. OpenMPOffloadMappingFlags Flags = getMapTypeBits(
  7407. MapType, MapModifiers, IsImplicit,
  7408. !IsExpressionFirstInfo || RequiresReference,
  7409. IsCaptureFirstInfo && !RequiresReference);
  7410. if (!IsExpressionFirstInfo) {
  7411. // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
  7412. // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
  7413. if (IsPointer)
  7414. Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
  7415. OMP_MAP_DELETE | OMP_MAP_CLOSE);
  7416. if (ShouldBeMemberOf) {
  7417. // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
  7418. // should be later updated with the correct value of MEMBER_OF.
  7419. Flags |= OMP_MAP_MEMBER_OF;
  7420. // From now on, all subsequent PTR_AND_OBJ entries should not be
  7421. // marked as MEMBER_OF.
  7422. ShouldBeMemberOf = false;
  7423. }
  7424. }
  7425. Types.push_back(Flags);
  7426. }
  7427. // If we have encountered a member expression so far, keep track of the
  7428. // mapped member. If the parent is "*this", then the value declaration
  7429. // is nullptr.
  7430. if (EncounteredME) {
  7431. const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
  7432. unsigned FieldIndex = FD->getFieldIndex();
  7433. // Update info about the lowest and highest elements for this struct
  7434. if (!PartialStruct.Base.isValid()) {
  7435. PartialStruct.LowestElem = {FieldIndex, LB};
  7436. PartialStruct.HighestElem = {FieldIndex, LB};
  7437. PartialStruct.Base = BP;
  7438. } else if (FieldIndex < PartialStruct.LowestElem.first) {
  7439. PartialStruct.LowestElem = {FieldIndex, LB};
  7440. } else if (FieldIndex > PartialStruct.HighestElem.first) {
  7441. PartialStruct.HighestElem = {FieldIndex, LB};
  7442. }
  7443. }
  7444. // If we have a final array section, we are done with this expression.
  7445. if (IsFinalArraySection)
  7446. break;
  7447. // The pointer becomes the base for the next element.
  7448. if (Next != CE)
  7449. BP = LB;
  7450. IsExpressionFirstInfo = false;
  7451. IsCaptureFirstInfo = false;
  7452. }
  7453. }
  7454. }
  7455. /// Return the adjusted map modifiers if the declaration a capture refers to
  7456. /// appears in a first-private clause. This is expected to be used only with
  7457. /// directives that start with 'target'.
  7458. MappableExprsHandler::OpenMPOffloadMappingFlags
  7459. getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
  7460. assert(Cap.capturesVariable() && "Expected capture by reference only!");
  7461. // A first private variable captured by reference will use only the
  7462. // 'private ptr' and 'map to' flag. Return the right flags if the captured
  7463. // declaration is known as first-private in this handler.
  7464. if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
  7465. if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
  7466. Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
  7467. return MappableExprsHandler::OMP_MAP_ALWAYS |
  7468. MappableExprsHandler::OMP_MAP_TO;
  7469. if (Cap.getCapturedVar()->getType()->isAnyPointerType())
  7470. return MappableExprsHandler::OMP_MAP_TO |
  7471. MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
  7472. return MappableExprsHandler::OMP_MAP_PRIVATE |
  7473. MappableExprsHandler::OMP_MAP_TO;
  7474. }
  7475. return MappableExprsHandler::OMP_MAP_TO |
  7476. MappableExprsHandler::OMP_MAP_FROM;
  7477. }
  7478. static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
  7479. // Rotate by getFlagMemberOffset() bits.
  7480. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
  7481. << getFlagMemberOffset());
  7482. }
  7483. static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
  7484. OpenMPOffloadMappingFlags MemberOfFlag) {
  7485. // If the entry is PTR_AND_OBJ but has not been marked with the special
  7486. // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
  7487. // marked as MEMBER_OF.
  7488. if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
  7489. ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
  7490. return;
  7491. // Reset the placeholder value to prepare the flag for the assignment of the
  7492. // proper MEMBER_OF value.
  7493. Flags &= ~OMP_MAP_MEMBER_OF;
  7494. Flags |= MemberOfFlag;
  7495. }
  7496. void getPlainLayout(const CXXRecordDecl *RD,
  7497. llvm::SmallVectorImpl<const FieldDecl *> &Layout,
  7498. bool AsBase) const {
  7499. const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
  7500. llvm::StructType *St =
  7501. AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
  7502. unsigned NumElements = St->getNumElements();
  7503. llvm::SmallVector<
  7504. llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
  7505. RecordLayout(NumElements);
  7506. // Fill bases.
  7507. for (const auto &I : RD->bases()) {
  7508. if (I.isVirtual())
  7509. continue;
  7510. const auto *Base = I.getType()->getAsCXXRecordDecl();
  7511. // Ignore empty bases.
  7512. if (Base->isEmpty() || CGF.getContext()
  7513. .getASTRecordLayout(Base)
  7514. .getNonVirtualSize()
  7515. .isZero())
  7516. continue;
  7517. unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
  7518. RecordLayout[FieldIndex] = Base;
  7519. }
  7520. // Fill in virtual bases.
  7521. for (const auto &I : RD->vbases()) {
  7522. const auto *Base = I.getType()->getAsCXXRecordDecl();
  7523. // Ignore empty bases.
  7524. if (Base->isEmpty())
  7525. continue;
  7526. unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
  7527. if (RecordLayout[FieldIndex])
  7528. continue;
  7529. RecordLayout[FieldIndex] = Base;
  7530. }
  7531. // Fill in all the fields.
  7532. assert(!RD->isUnion() && "Unexpected union.");
  7533. for (const auto *Field : RD->fields()) {
  7534. // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
  7535. // will fill in later.)
  7536. if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
  7537. unsigned FieldIndex = RL.getLLVMFieldNo(Field);
  7538. RecordLayout[FieldIndex] = Field;
  7539. }
  7540. }
  7541. for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
  7542. &Data : RecordLayout) {
  7543. if (Data.isNull())
  7544. continue;
  7545. if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
  7546. getPlainLayout(Base, Layout, /*AsBase=*/true);
  7547. else
  7548. Layout.push_back(Data.get<const FieldDecl *>());
  7549. }
  7550. }
  7551. public:
  7552. MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
  7553. : CurDir(&Dir), CGF(CGF) {
  7554. // Extract firstprivate clause information.
  7555. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
  7556. for (const auto *D : C->varlists())
  7557. FirstPrivateDecls.try_emplace(
  7558. cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
  7559. // Extract device pointer clause information.
  7560. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
  7561. for (auto L : C->component_lists())
  7562. DevPointersMap[L.first].push_back(L.second);
  7563. }
  7564. /// Constructor for the declare mapper directive.
  7565. MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
  7566. : CurDir(&Dir), CGF(CGF) {}
  7567. /// Generate code for the combined entry if we have a partially mapped struct
  7568. /// and take care of the mapping flags of the arguments corresponding to
  7569. /// individual struct members.
  7570. void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
  7571. MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
  7572. MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
  7573. const StructRangeInfoTy &PartialStruct) const {
  7574. // Base is the base of the struct
  7575. BasePointers.push_back(PartialStruct.Base.getPointer());
  7576. // Pointer is the address of the lowest element
  7577. llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
  7578. Pointers.push_back(LB);
  7579. // Size is (addr of {highest+1} element) - (addr of lowest element)
  7580. llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
  7581. llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
  7582. llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
  7583. llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
  7584. llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
  7585. llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
  7586. /*isSigned=*/false);
  7587. Sizes.push_back(Size);
  7588. // Map type is always TARGET_PARAM
  7589. Types.push_back(OMP_MAP_TARGET_PARAM);
  7590. // Remove TARGET_PARAM flag from the first element
  7591. (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
  7592. // All other current entries will be MEMBER_OF the combined entry
  7593. // (except for PTR_AND_OBJ entries which do not have a placeholder value
  7594. // 0xFFFF in the MEMBER_OF field).
  7595. OpenMPOffloadMappingFlags MemberOfFlag =
  7596. getMemberOfFlag(BasePointers.size() - 1);
  7597. for (auto &M : CurTypes)
  7598. setCorrectMemberOfFlag(M, MemberOfFlag);
  7599. }
  7600. /// Generate all the base pointers, section pointers, sizes and map
  7601. /// types for the extracted mappable expressions. Also, for each item that
  7602. /// relates with a device pointer, a pair of the relevant declaration and
  7603. /// index where it occurs is appended to the device pointers info array.
  7604. void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
  7605. MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
  7606. MapFlagsArrayTy &Types) const {
  7607. // We have to process the component lists that relate with the same
  7608. // declaration in a single chunk so that we can generate the map flags
  7609. // correctly. Therefore, we organize all lists in a map.
  7610. llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
  7611. // Helper function to fill the information map for the different supported
  7612. // clauses.
  7613. auto &&InfoGen = [&Info](
  7614. const ValueDecl *D,
  7615. OMPClauseMappableExprCommon::MappableExprComponentListRef L,
  7616. OpenMPMapClauseKind MapType,
  7617. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  7618. bool ReturnDevicePointer, bool IsImplicit) {
  7619. const ValueDecl *VD =
  7620. D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
  7621. Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
  7622. IsImplicit);
  7623. };
  7624. assert(CurDir.is<const OMPExecutableDirective *>() &&
  7625. "Expect a executable directive");
  7626. const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
  7627. for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
  7628. for (const auto &L : C->component_lists()) {
  7629. InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
  7630. /*ReturnDevicePointer=*/false, C->isImplicit());
  7631. }
  7632. for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
  7633. for (const auto &L : C->component_lists()) {
  7634. InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
  7635. /*ReturnDevicePointer=*/false, C->isImplicit());
  7636. }
  7637. for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
  7638. for (const auto &L : C->component_lists()) {
  7639. InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
  7640. /*ReturnDevicePointer=*/false, C->isImplicit());
  7641. }
  7642. // Look at the use_device_ptr clause information and mark the existing map
  7643. // entries as such. If there is no map information for an entry in the
  7644. // use_device_ptr list, we create one with map type 'alloc' and zero size
  7645. // section. It is the user fault if that was not mapped before. If there is
  7646. // no map information and the pointer is a struct member, then we defer the
  7647. // emission of that entry until the whole struct has been processed.
  7648. llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
  7649. DeferredInfo;
  7650. for (const auto *C :
  7651. CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
  7652. for (const auto &L : C->component_lists()) {
  7653. assert(!L.second.empty() && "Not expecting empty list of components!");
  7654. const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
  7655. VD = cast<ValueDecl>(VD->getCanonicalDecl());
  7656. const Expr *IE = L.second.back().getAssociatedExpression();
  7657. // If the first component is a member expression, we have to look into
  7658. // 'this', which maps to null in the map of map information. Otherwise
  7659. // look directly for the information.
  7660. auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
  7661. // We potentially have map information for this declaration already.
  7662. // Look for the first set of components that refer to it.
  7663. if (It != Info.end()) {
  7664. auto CI = std::find_if(
  7665. It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
  7666. return MI.Components.back().getAssociatedDeclaration() == VD;
  7667. });
  7668. // If we found a map entry, signal that the pointer has to be returned
  7669. // and move on to the next declaration.
  7670. if (CI != It->second.end()) {
  7671. CI->ReturnDevicePointer = true;
  7672. continue;
  7673. }
  7674. }
  7675. // We didn't find any match in our map information - generate a zero
  7676. // size array section - if the pointer is a struct member we defer this
  7677. // action until the whole struct has been processed.
  7678. if (isa<MemberExpr>(IE)) {
  7679. // Insert the pointer into Info to be processed by
  7680. // generateInfoForComponentList. Because it is a member pointer
  7681. // without a pointee, no entry will be generated for it, therefore
  7682. // we need to generate one after the whole struct has been processed.
  7683. // Nonetheless, generateInfoForComponentList must be called to take
  7684. // the pointer into account for the calculation of the range of the
  7685. // partial struct.
  7686. InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
  7687. /*ReturnDevicePointer=*/false, C->isImplicit());
  7688. DeferredInfo[nullptr].emplace_back(IE, VD);
  7689. } else {
  7690. llvm::Value *Ptr =
  7691. CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
  7692. BasePointers.emplace_back(Ptr, VD);
  7693. Pointers.push_back(Ptr);
  7694. Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
  7695. Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
  7696. }
  7697. }
  7698. }
  7699. for (const auto &M : Info) {
  7700. // We need to know when we generate information for the first component
  7701. // associated with a capture, because the mapping flags depend on it.
  7702. bool IsFirstComponentList = true;
  7703. // Temporary versions of arrays
  7704. MapBaseValuesArrayTy CurBasePointers;
  7705. MapValuesArrayTy CurPointers;
  7706. MapValuesArrayTy CurSizes;
  7707. MapFlagsArrayTy CurTypes;
  7708. StructRangeInfoTy PartialStruct;
  7709. for (const MapInfo &L : M.second) {
  7710. assert(!L.Components.empty() &&
  7711. "Not expecting declaration with no component lists.");
  7712. // Remember the current base pointer index.
  7713. unsigned CurrentBasePointersIdx = CurBasePointers.size();
  7714. generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
  7715. CurBasePointers, CurPointers, CurSizes,
  7716. CurTypes, PartialStruct,
  7717. IsFirstComponentList, L.IsImplicit);
  7718. // If this entry relates with a device pointer, set the relevant
  7719. // declaration and add the 'return pointer' flag.
  7720. if (L.ReturnDevicePointer) {
  7721. assert(CurBasePointers.size() > CurrentBasePointersIdx &&
  7722. "Unexpected number of mapped base pointers.");
  7723. const ValueDecl *RelevantVD =
  7724. L.Components.back().getAssociatedDeclaration();
  7725. assert(RelevantVD &&
  7726. "No relevant declaration related with device pointer??");
  7727. CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
  7728. CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
  7729. }
  7730. IsFirstComponentList = false;
  7731. }
  7732. // Append any pending zero-length pointers which are struct members and
  7733. // used with use_device_ptr.
  7734. auto CI = DeferredInfo.find(M.first);
  7735. if (CI != DeferredInfo.end()) {
  7736. for (const DeferredDevicePtrEntryTy &L : CI->second) {
  7737. llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
  7738. llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
  7739. this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
  7740. CurBasePointers.emplace_back(BasePtr, L.VD);
  7741. CurPointers.push_back(Ptr);
  7742. CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
  7743. // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
  7744. // value MEMBER_OF=FFFF so that the entry is later updated with the
  7745. // correct value of MEMBER_OF.
  7746. CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
  7747. OMP_MAP_MEMBER_OF);
  7748. }
  7749. }
  7750. // If there is an entry in PartialStruct it means we have a struct with
  7751. // individual members mapped. Emit an extra combined entry.
  7752. if (PartialStruct.Base.isValid())
  7753. emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
  7754. PartialStruct);
  7755. // We need to append the results of this capture to what we already have.
  7756. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
  7757. Pointers.append(CurPointers.begin(), CurPointers.end());
  7758. Sizes.append(CurSizes.begin(), CurSizes.end());
  7759. Types.append(CurTypes.begin(), CurTypes.end());
  7760. }
  7761. }
  7762. /// Generate all the base pointers, section pointers, sizes and map types for
  7763. /// the extracted map clauses of user-defined mapper.
  7764. void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
  7765. MapValuesArrayTy &Pointers,
  7766. MapValuesArrayTy &Sizes,
  7767. MapFlagsArrayTy &Types) const {
  7768. assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
  7769. "Expect a declare mapper directive");
  7770. const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
  7771. // We have to process the component lists that relate with the same
  7772. // declaration in a single chunk so that we can generate the map flags
  7773. // correctly. Therefore, we organize all lists in a map.
  7774. llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
  7775. // Helper function to fill the information map for the different supported
  7776. // clauses.
  7777. auto &&InfoGen = [&Info](
  7778. const ValueDecl *D,
  7779. OMPClauseMappableExprCommon::MappableExprComponentListRef L,
  7780. OpenMPMapClauseKind MapType,
  7781. ArrayRef<OpenMPMapModifierKind> MapModifiers,
  7782. bool ReturnDevicePointer, bool IsImplicit) {
  7783. const ValueDecl *VD =
  7784. D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
  7785. Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
  7786. IsImplicit);
  7787. };
  7788. for (const auto *C : CurMapperDir->clauselists()) {
  7789. const auto *MC = cast<OMPMapClause>(C);
  7790. for (const auto &L : MC->component_lists()) {
  7791. InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
  7792. /*ReturnDevicePointer=*/false, MC->isImplicit());
  7793. }
  7794. }
  7795. for (const auto &M : Info) {
  7796. // We need to know when we generate information for the first component
  7797. // associated with a capture, because the mapping flags depend on it.
  7798. bool IsFirstComponentList = true;
  7799. // Temporary versions of arrays
  7800. MapBaseValuesArrayTy CurBasePointers;
  7801. MapValuesArrayTy CurPointers;
  7802. MapValuesArrayTy CurSizes;
  7803. MapFlagsArrayTy CurTypes;
  7804. StructRangeInfoTy PartialStruct;
  7805. for (const MapInfo &L : M.second) {
  7806. assert(!L.Components.empty() &&
  7807. "Not expecting declaration with no component lists.");
  7808. generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
  7809. CurBasePointers, CurPointers, CurSizes,
  7810. CurTypes, PartialStruct,
  7811. IsFirstComponentList, L.IsImplicit);
  7812. IsFirstComponentList = false;
  7813. }
  7814. // If there is an entry in PartialStruct it means we have a struct with
  7815. // individual members mapped. Emit an extra combined entry.
  7816. if (PartialStruct.Base.isValid())
  7817. emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
  7818. PartialStruct);
  7819. // We need to append the results of this capture to what we already have.
  7820. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
  7821. Pointers.append(CurPointers.begin(), CurPointers.end());
  7822. Sizes.append(CurSizes.begin(), CurSizes.end());
  7823. Types.append(CurTypes.begin(), CurTypes.end());
  7824. }
  7825. }
  7826. /// Emit capture info for lambdas for variables captured by reference.
  7827. void generateInfoForLambdaCaptures(
  7828. const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
  7829. MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
  7830. MapFlagsArrayTy &Types,
  7831. llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
  7832. const auto *RD = VD->getType()
  7833. .getCanonicalType()
  7834. .getNonReferenceType()
  7835. ->getAsCXXRecordDecl();
  7836. if (!RD || !RD->isLambda())
  7837. return;
  7838. Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
  7839. LValue VDLVal = CGF.MakeAddrLValue(
  7840. VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
  7841. llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
  7842. FieldDecl *ThisCapture = nullptr;
  7843. RD->getCaptureFields(Captures, ThisCapture);
  7844. if (ThisCapture) {
  7845. LValue ThisLVal =
  7846. CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
  7847. LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
  7848. LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
  7849. BasePointers.push_back(ThisLVal.getPointer());
  7850. Pointers.push_back(ThisLValVal.getPointer());
  7851. Sizes.push_back(
  7852. CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
  7853. CGF.Int64Ty, /*isSigned=*/true));
  7854. Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
  7855. OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
  7856. }
  7857. for (const LambdaCapture &LC : RD->captures()) {
  7858. if (!LC.capturesVariable())
  7859. continue;
  7860. const VarDecl *VD = LC.getCapturedVar();
  7861. if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
  7862. continue;
  7863. auto It = Captures.find(VD);
  7864. assert(It != Captures.end() && "Found lambda capture without field.");
  7865. LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
  7866. if (LC.getCaptureKind() == LCK_ByRef) {
  7867. LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
  7868. LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
  7869. BasePointers.push_back(VarLVal.getPointer());
  7870. Pointers.push_back(VarLValVal.getPointer());
  7871. Sizes.push_back(CGF.Builder.CreateIntCast(
  7872. CGF.getTypeSize(
  7873. VD->getType().getCanonicalType().getNonReferenceType()),
  7874. CGF.Int64Ty, /*isSigned=*/true));
  7875. } else {
  7876. RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
  7877. LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
  7878. BasePointers.push_back(VarLVal.getPointer());
  7879. Pointers.push_back(VarRVal.getScalarVal());
  7880. Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
  7881. }
  7882. Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
  7883. OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
  7884. }
  7885. }
  7886. /// Set correct indices for lambdas captures.
  7887. void adjustMemberOfForLambdaCaptures(
  7888. const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
  7889. MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
  7890. MapFlagsArrayTy &Types) const {
  7891. for (unsigned I = 0, E = Types.size(); I < E; ++I) {
  7892. // Set correct member_of idx for all implicit lambda captures.
  7893. if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
  7894. OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
  7895. continue;
  7896. llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
  7897. assert(BasePtr && "Unable to find base lambda address.");
  7898. int TgtIdx = -1;
  7899. for (unsigned J = I; J > 0; --J) {
  7900. unsigned Idx = J - 1;
  7901. if (Pointers[Idx] != BasePtr)
  7902. continue;
  7903. TgtIdx = Idx;
  7904. break;
  7905. }
  7906. assert(TgtIdx != -1 && "Unable to find parent lambda.");
  7907. // All other current entries will be MEMBER_OF the combined entry
  7908. // (except for PTR_AND_OBJ entries which do not have a placeholder value
  7909. // 0xFFFF in the MEMBER_OF field).
  7910. OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
  7911. setCorrectMemberOfFlag(Types[I], MemberOfFlag);
  7912. }
  7913. }
  7914. /// Generate the base pointers, section pointers, sizes and map types
  7915. /// associated to a given capture.
  7916. void generateInfoForCapture(const CapturedStmt::Capture *Cap,
  7917. llvm::Value *Arg,
  7918. MapBaseValuesArrayTy &BasePointers,
  7919. MapValuesArrayTy &Pointers,
  7920. MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
  7921. StructRangeInfoTy &PartialStruct) const {
  7922. assert(!Cap->capturesVariableArrayType() &&
  7923. "Not expecting to generate map info for a variable array type!");
  7924. // We need to know when we generating information for the first component
  7925. const ValueDecl *VD = Cap->capturesThis()
  7926. ? nullptr
  7927. : Cap->getCapturedVar()->getCanonicalDecl();
  7928. // If this declaration appears in a is_device_ptr clause we just have to
  7929. // pass the pointer by value. If it is a reference to a declaration, we just
  7930. // pass its value.
  7931. if (DevPointersMap.count(VD)) {
  7932. BasePointers.emplace_back(Arg, VD);
  7933. Pointers.push_back(Arg);
  7934. Sizes.push_back(
  7935. CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
  7936. CGF.Int64Ty, /*isSigned=*/true));
  7937. Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
  7938. return;
  7939. }
  7940. using MapData =
  7941. std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
  7942. OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
  7943. SmallVector<MapData, 4> DeclComponentLists;
  7944. assert(CurDir.is<const OMPExecutableDirective *>() &&
  7945. "Expect a executable directive");
  7946. const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
  7947. for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
  7948. for (const auto &L : C->decl_component_lists(VD)) {
  7949. assert(L.first == VD &&
  7950. "We got information for the wrong declaration??");
  7951. assert(!L.second.empty() &&
  7952. "Not expecting declaration with no component lists.");
  7953. DeclComponentLists.emplace_back(L.second, C->getMapType(),
  7954. C->getMapTypeModifiers(),
  7955. C->isImplicit());
  7956. }
  7957. }
  7958. // Find overlapping elements (including the offset from the base element).
  7959. llvm::SmallDenseMap<
  7960. const MapData *,
  7961. llvm::SmallVector<
  7962. OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
  7963. 4>
  7964. OverlappedData;
  7965. size_t Count = 0;
  7966. for (const MapData &L : DeclComponentLists) {
  7967. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  7968. OpenMPMapClauseKind MapType;
  7969. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  7970. bool IsImplicit;
  7971. std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
  7972. ++Count;
  7973. for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
  7974. OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
  7975. std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
  7976. auto CI = Components.rbegin();
  7977. auto CE = Components.rend();
  7978. auto SI = Components1.rbegin();
  7979. auto SE = Components1.rend();
  7980. for (; CI != CE && SI != SE; ++CI, ++SI) {
  7981. if (CI->getAssociatedExpression()->getStmtClass() !=
  7982. SI->getAssociatedExpression()->getStmtClass())
  7983. break;
  7984. // Are we dealing with different variables/fields?
  7985. if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
  7986. break;
  7987. }
  7988. // Found overlapping if, at least for one component, reached the head of
  7989. // the components list.
  7990. if (CI == CE || SI == SE) {
  7991. assert((CI != CE || SI != SE) &&
  7992. "Unexpected full match of the mapping components.");
  7993. const MapData &BaseData = CI == CE ? L : L1;
  7994. OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
  7995. SI == SE ? Components : Components1;
  7996. auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
  7997. OverlappedElements.getSecond().push_back(SubData);
  7998. }
  7999. }
  8000. }
  8001. // Sort the overlapped elements for each item.
  8002. llvm::SmallVector<const FieldDecl *, 4> Layout;
  8003. if (!OverlappedData.empty()) {
  8004. if (const auto *CRD =
  8005. VD->getType().getCanonicalType()->getAsCXXRecordDecl())
  8006. getPlainLayout(CRD, Layout, /*AsBase=*/false);
  8007. else {
  8008. const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
  8009. Layout.append(RD->field_begin(), RD->field_end());
  8010. }
  8011. }
  8012. for (auto &Pair : OverlappedData) {
  8013. llvm::sort(
  8014. Pair.getSecond(),
  8015. [&Layout](
  8016. OMPClauseMappableExprCommon::MappableExprComponentListRef First,
  8017. OMPClauseMappableExprCommon::MappableExprComponentListRef
  8018. Second) {
  8019. auto CI = First.rbegin();
  8020. auto CE = First.rend();
  8021. auto SI = Second.rbegin();
  8022. auto SE = Second.rend();
  8023. for (; CI != CE && SI != SE; ++CI, ++SI) {
  8024. if (CI->getAssociatedExpression()->getStmtClass() !=
  8025. SI->getAssociatedExpression()->getStmtClass())
  8026. break;
  8027. // Are we dealing with different variables/fields?
  8028. if (CI->getAssociatedDeclaration() !=
  8029. SI->getAssociatedDeclaration())
  8030. break;
  8031. }
  8032. // Lists contain the same elements.
  8033. if (CI == CE && SI == SE)
  8034. return false;
  8035. // List with less elements is less than list with more elements.
  8036. if (CI == CE || SI == SE)
  8037. return CI == CE;
  8038. const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
  8039. const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
  8040. if (FD1->getParent() == FD2->getParent())
  8041. return FD1->getFieldIndex() < FD2->getFieldIndex();
  8042. const auto It =
  8043. llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
  8044. return FD == FD1 || FD == FD2;
  8045. });
  8046. return *It == FD1;
  8047. });
  8048. }
  8049. // Associated with a capture, because the mapping flags depend on it.
  8050. // Go through all of the elements with the overlapped elements.
  8051. for (const auto &Pair : OverlappedData) {
  8052. const MapData &L = *Pair.getFirst();
  8053. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8054. OpenMPMapClauseKind MapType;
  8055. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  8056. bool IsImplicit;
  8057. std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
  8058. ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
  8059. OverlappedComponents = Pair.getSecond();
  8060. bool IsFirstComponentList = true;
  8061. generateInfoForComponentList(MapType, MapModifiers, Components,
  8062. BasePointers, Pointers, Sizes, Types,
  8063. PartialStruct, IsFirstComponentList,
  8064. IsImplicit, OverlappedComponents);
  8065. }
  8066. // Go through other elements without overlapped elements.
  8067. bool IsFirstComponentList = OverlappedData.empty();
  8068. for (const MapData &L : DeclComponentLists) {
  8069. OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
  8070. OpenMPMapClauseKind MapType;
  8071. ArrayRef<OpenMPMapModifierKind> MapModifiers;
  8072. bool IsImplicit;
  8073. std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
  8074. auto It = OverlappedData.find(&L);
  8075. if (It == OverlappedData.end())
  8076. generateInfoForComponentList(MapType, MapModifiers, Components,
  8077. BasePointers, Pointers, Sizes, Types,
  8078. PartialStruct, IsFirstComponentList,
  8079. IsImplicit);
  8080. IsFirstComponentList = false;
  8081. }
  8082. }
  8083. /// Generate the base pointers, section pointers, sizes and map types
  8084. /// associated with the declare target link variables.
  8085. void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
  8086. MapValuesArrayTy &Pointers,
  8087. MapValuesArrayTy &Sizes,
  8088. MapFlagsArrayTy &Types) const {
  8089. assert(CurDir.is<const OMPExecutableDirective *>() &&
  8090. "Expect a executable directive");
  8091. const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
  8092. // Map other list items in the map clause which are not captured variables
  8093. // but "declare target link" global variables.
  8094. for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
  8095. for (const auto &L : C->component_lists()) {
  8096. if (!L.first)
  8097. continue;
  8098. const auto *VD = dyn_cast<VarDecl>(L.first);
  8099. if (!VD)
  8100. continue;
  8101. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  8102. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  8103. if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
  8104. !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
  8105. continue;
  8106. StructRangeInfoTy PartialStruct;
  8107. generateInfoForComponentList(
  8108. C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
  8109. Pointers, Sizes, Types, PartialStruct,
  8110. /*IsFirstComponentList=*/true, C->isImplicit());
  8111. assert(!PartialStruct.Base.isValid() &&
  8112. "No partial structs for declare target link expected.");
  8113. }
  8114. }
  8115. }
  8116. /// Generate the default map information for a given capture \a CI,
  8117. /// record field declaration \a RI and captured value \a CV.
  8118. void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
  8119. const FieldDecl &RI, llvm::Value *CV,
  8120. MapBaseValuesArrayTy &CurBasePointers,
  8121. MapValuesArrayTy &CurPointers,
  8122. MapValuesArrayTy &CurSizes,
  8123. MapFlagsArrayTy &CurMapTypes) const {
  8124. bool IsImplicit = true;
  8125. // Do the default mapping.
  8126. if (CI.capturesThis()) {
  8127. CurBasePointers.push_back(CV);
  8128. CurPointers.push_back(CV);
  8129. const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
  8130. CurSizes.push_back(
  8131. CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
  8132. CGF.Int64Ty, /*isSigned=*/true));
  8133. // Default map type.
  8134. CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
  8135. } else if (CI.capturesVariableByCopy()) {
  8136. CurBasePointers.push_back(CV);
  8137. CurPointers.push_back(CV);
  8138. if (!RI.getType()->isAnyPointerType()) {
  8139. // We have to signal to the runtime captures passed by value that are
  8140. // not pointers.
  8141. CurMapTypes.push_back(OMP_MAP_LITERAL);
  8142. CurSizes.push_back(CGF.Builder.CreateIntCast(
  8143. CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
  8144. } else {
  8145. // Pointers are implicitly mapped with a zero size and no flags
  8146. // (other than first map that is added for all implicit maps).
  8147. CurMapTypes.push_back(OMP_MAP_NONE);
  8148. CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
  8149. }
  8150. const VarDecl *VD = CI.getCapturedVar();
  8151. auto I = FirstPrivateDecls.find(VD);
  8152. if (I != FirstPrivateDecls.end())
  8153. IsImplicit = I->getSecond();
  8154. } else {
  8155. assert(CI.capturesVariable() && "Expected captured reference.");
  8156. const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
  8157. QualType ElementType = PtrTy->getPointeeType();
  8158. CurSizes.push_back(CGF.Builder.CreateIntCast(
  8159. CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
  8160. // The default map type for a scalar/complex type is 'to' because by
  8161. // default the value doesn't have to be retrieved. For an aggregate
  8162. // type, the default is 'tofrom'.
  8163. CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
  8164. const VarDecl *VD = CI.getCapturedVar();
  8165. auto I = FirstPrivateDecls.find(VD);
  8166. if (I != FirstPrivateDecls.end() &&
  8167. VD->getType().isConstant(CGF.getContext())) {
  8168. llvm::Constant *Addr =
  8169. CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
  8170. // Copy the value of the original variable to the new global copy.
  8171. CGF.Builder.CreateMemCpy(
  8172. CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
  8173. Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
  8174. CurSizes.back(), /*IsVolatile=*/false);
  8175. // Use new global variable as the base pointers.
  8176. CurBasePointers.push_back(Addr);
  8177. CurPointers.push_back(Addr);
  8178. } else {
  8179. CurBasePointers.push_back(CV);
  8180. if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
  8181. Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
  8182. CV, ElementType, CGF.getContext().getDeclAlign(VD),
  8183. AlignmentSource::Decl));
  8184. CurPointers.push_back(PtrAddr.getPointer());
  8185. } else {
  8186. CurPointers.push_back(CV);
  8187. }
  8188. }
  8189. if (I != FirstPrivateDecls.end())
  8190. IsImplicit = I->getSecond();
  8191. }
  8192. // Every default map produces a single argument which is a target parameter.
  8193. CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
  8194. // Add flag stating this is an implicit map.
  8195. if (IsImplicit)
  8196. CurMapTypes.back() |= OMP_MAP_IMPLICIT;
  8197. }
  8198. };
  8199. } // anonymous namespace
  8200. /// Emit the arrays used to pass the captures and map information to the
  8201. /// offloading runtime library. If there is no map or capture information,
  8202. /// return nullptr by reference.
  8203. static void
  8204. emitOffloadingArrays(CodeGenFunction &CGF,
  8205. MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
  8206. MappableExprsHandler::MapValuesArrayTy &Pointers,
  8207. MappableExprsHandler::MapValuesArrayTy &Sizes,
  8208. MappableExprsHandler::MapFlagsArrayTy &MapTypes,
  8209. CGOpenMPRuntime::TargetDataInfo &Info) {
  8210. CodeGenModule &CGM = CGF.CGM;
  8211. ASTContext &Ctx = CGF.getContext();
  8212. // Reset the array information.
  8213. Info.clearArrayInfo();
  8214. Info.NumberOfPtrs = BasePointers.size();
  8215. if (Info.NumberOfPtrs) {
  8216. // Detect if we have any capture size requiring runtime evaluation of the
  8217. // size so that a constant array could be eventually used.
  8218. bool hasRuntimeEvaluationCaptureSize = false;
  8219. for (llvm::Value *S : Sizes)
  8220. if (!isa<llvm::Constant>(S)) {
  8221. hasRuntimeEvaluationCaptureSize = true;
  8222. break;
  8223. }
  8224. llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
  8225. QualType PointerArrayType = Ctx.getConstantArrayType(
  8226. Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
  8227. /*IndexTypeQuals=*/0);
  8228. Info.BasePointersArray =
  8229. CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
  8230. Info.PointersArray =
  8231. CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
  8232. // If we don't have any VLA types or other types that require runtime
  8233. // evaluation, we can use a constant array for the map sizes, otherwise we
  8234. // need to fill up the arrays as we do for the pointers.
  8235. QualType Int64Ty =
  8236. Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  8237. if (hasRuntimeEvaluationCaptureSize) {
  8238. QualType SizeArrayType = Ctx.getConstantArrayType(
  8239. Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
  8240. /*IndexTypeQuals=*/0);
  8241. Info.SizesArray =
  8242. CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
  8243. } else {
  8244. // We expect all the sizes to be constant, so we collect them to create
  8245. // a constant array.
  8246. SmallVector<llvm::Constant *, 16> ConstSizes;
  8247. for (llvm::Value *S : Sizes)
  8248. ConstSizes.push_back(cast<llvm::Constant>(S));
  8249. auto *SizesArrayInit = llvm::ConstantArray::get(
  8250. llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
  8251. std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
  8252. auto *SizesArrayGbl = new llvm::GlobalVariable(
  8253. CGM.getModule(), SizesArrayInit->getType(),
  8254. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
  8255. SizesArrayInit, Name);
  8256. SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  8257. Info.SizesArray = SizesArrayGbl;
  8258. }
  8259. // The map types are always constant so we don't need to generate code to
  8260. // fill arrays. Instead, we create an array constant.
  8261. SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
  8262. llvm::copy(MapTypes, Mapping.begin());
  8263. llvm::Constant *MapTypesArrayInit =
  8264. llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
  8265. std::string MaptypesName =
  8266. CGM.getOpenMPRuntime().getName({"offload_maptypes"});
  8267. auto *MapTypesArrayGbl = new llvm::GlobalVariable(
  8268. CGM.getModule(), MapTypesArrayInit->getType(),
  8269. /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
  8270. MapTypesArrayInit, MaptypesName);
  8271. MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
  8272. Info.MapTypesArray = MapTypesArrayGbl;
  8273. for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
  8274. llvm::Value *BPVal = *BasePointers[I];
  8275. llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
  8276. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8277. Info.BasePointersArray, 0, I);
  8278. BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8279. BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
  8280. Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
  8281. CGF.Builder.CreateStore(BPVal, BPAddr);
  8282. if (Info.requiresDevicePointerInfo())
  8283. if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
  8284. Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
  8285. llvm::Value *PVal = Pointers[I];
  8286. llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
  8287. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8288. Info.PointersArray, 0, I);
  8289. P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  8290. P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
  8291. Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
  8292. CGF.Builder.CreateStore(PVal, PAddr);
  8293. if (hasRuntimeEvaluationCaptureSize) {
  8294. llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
  8295. llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
  8296. Info.SizesArray,
  8297. /*Idx0=*/0,
  8298. /*Idx1=*/I);
  8299. Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
  8300. CGF.Builder.CreateStore(
  8301. CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
  8302. SAddr);
  8303. }
  8304. }
  8305. }
  8306. }
  8307. /// Emit the arguments to be passed to the runtime library based on the
  8308. /// arrays of pointers, sizes and map types.
  8309. static void emitOffloadingArraysArgument(
  8310. CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
  8311. llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
  8312. llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
  8313. CodeGenModule &CGM = CGF.CGM;
  8314. if (Info.NumberOfPtrs) {
  8315. BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
  8316. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8317. Info.BasePointersArray,
  8318. /*Idx0=*/0, /*Idx1=*/0);
  8319. PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
  8320. llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
  8321. Info.PointersArray,
  8322. /*Idx0=*/0,
  8323. /*Idx1=*/0);
  8324. SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
  8325. llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
  8326. /*Idx0=*/0, /*Idx1=*/0);
  8327. MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
  8328. llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
  8329. Info.MapTypesArray,
  8330. /*Idx0=*/0,
  8331. /*Idx1=*/0);
  8332. } else {
  8333. BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
  8334. PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
  8335. SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
  8336. MapTypesArrayArg =
  8337. llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
  8338. }
  8339. }
  8340. /// Check for inner distribute directive.
  8341. static const OMPExecutableDirective *
  8342. getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
  8343. const auto *CS = D.getInnermostCapturedStmt();
  8344. const auto *Body =
  8345. CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
  8346. const Stmt *ChildStmt =
  8347. CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
  8348. if (const auto *NestedDir =
  8349. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  8350. OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
  8351. switch (D.getDirectiveKind()) {
  8352. case OMPD_target:
  8353. if (isOpenMPDistributeDirective(DKind))
  8354. return NestedDir;
  8355. if (DKind == OMPD_teams) {
  8356. Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
  8357. /*IgnoreCaptured=*/true);
  8358. if (!Body)
  8359. return nullptr;
  8360. ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
  8361. if (const auto *NND =
  8362. dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
  8363. DKind = NND->getDirectiveKind();
  8364. if (isOpenMPDistributeDirective(DKind))
  8365. return NND;
  8366. }
  8367. }
  8368. return nullptr;
  8369. case OMPD_target_teams:
  8370. if (isOpenMPDistributeDirective(DKind))
  8371. return NestedDir;
  8372. return nullptr;
  8373. case OMPD_target_parallel:
  8374. case OMPD_target_simd:
  8375. case OMPD_target_parallel_for:
  8376. case OMPD_target_parallel_for_simd:
  8377. return nullptr;
  8378. case OMPD_target_teams_distribute:
  8379. case OMPD_target_teams_distribute_simd:
  8380. case OMPD_target_teams_distribute_parallel_for:
  8381. case OMPD_target_teams_distribute_parallel_for_simd:
  8382. case OMPD_parallel:
  8383. case OMPD_for:
  8384. case OMPD_parallel_for:
  8385. case OMPD_parallel_sections:
  8386. case OMPD_for_simd:
  8387. case OMPD_parallel_for_simd:
  8388. case OMPD_cancel:
  8389. case OMPD_cancellation_point:
  8390. case OMPD_ordered:
  8391. case OMPD_threadprivate:
  8392. case OMPD_allocate:
  8393. case OMPD_task:
  8394. case OMPD_simd:
  8395. case OMPD_sections:
  8396. case OMPD_section:
  8397. case OMPD_single:
  8398. case OMPD_master:
  8399. case OMPD_critical:
  8400. case OMPD_taskyield:
  8401. case OMPD_barrier:
  8402. case OMPD_taskwait:
  8403. case OMPD_taskgroup:
  8404. case OMPD_atomic:
  8405. case OMPD_flush:
  8406. case OMPD_teams:
  8407. case OMPD_target_data:
  8408. case OMPD_target_exit_data:
  8409. case OMPD_target_enter_data:
  8410. case OMPD_distribute:
  8411. case OMPD_distribute_simd:
  8412. case OMPD_distribute_parallel_for:
  8413. case OMPD_distribute_parallel_for_simd:
  8414. case OMPD_teams_distribute:
  8415. case OMPD_teams_distribute_simd:
  8416. case OMPD_teams_distribute_parallel_for:
  8417. case OMPD_teams_distribute_parallel_for_simd:
  8418. case OMPD_target_update:
  8419. case OMPD_declare_simd:
  8420. case OMPD_declare_variant:
  8421. case OMPD_declare_target:
  8422. case OMPD_end_declare_target:
  8423. case OMPD_declare_reduction:
  8424. case OMPD_declare_mapper:
  8425. case OMPD_taskloop:
  8426. case OMPD_taskloop_simd:
  8427. case OMPD_requires:
  8428. case OMPD_unknown:
  8429. llvm_unreachable("Unexpected directive.");
  8430. }
  8431. }
  8432. return nullptr;
  8433. }
  8434. /// Emit the user-defined mapper function. The code generation follows the
  8435. /// pattern in the example below.
  8436. /// \code
  8437. /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
  8438. /// void *base, void *begin,
  8439. /// int64_t size, int64_t type) {
  8440. /// // Allocate space for an array section first.
  8441. /// if (size > 1 && !maptype.IsDelete)
  8442. /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
  8443. /// size*sizeof(Ty), clearToFrom(type));
  8444. /// // Map members.
  8445. /// for (unsigned i = 0; i < size; i++) {
  8446. /// // For each component specified by this mapper:
  8447. /// for (auto c : all_components) {
  8448. /// if (c.hasMapper())
  8449. /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
  8450. /// c.arg_type);
  8451. /// else
  8452. /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
  8453. /// c.arg_begin, c.arg_size, c.arg_type);
  8454. /// }
  8455. /// }
  8456. /// // Delete the array section.
  8457. /// if (size > 1 && maptype.IsDelete)
  8458. /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
  8459. /// size*sizeof(Ty), clearToFrom(type));
  8460. /// }
  8461. /// \endcode
  8462. void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
  8463. CodeGenFunction *CGF) {
  8464. if (UDMMap.count(D) > 0)
  8465. return;
  8466. ASTContext &C = CGM.getContext();
  8467. QualType Ty = D->getType();
  8468. QualType PtrTy = C.getPointerType(Ty).withRestrict();
  8469. QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
  8470. auto *MapperVarDecl =
  8471. cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
  8472. SourceLocation Loc = D->getLocation();
  8473. CharUnits ElementSize = C.getTypeSizeInChars(Ty);
  8474. // Prepare mapper function arguments and attributes.
  8475. ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  8476. C.VoidPtrTy, ImplicitParamDecl::Other);
  8477. ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
  8478. ImplicitParamDecl::Other);
  8479. ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
  8480. C.VoidPtrTy, ImplicitParamDecl::Other);
  8481. ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
  8482. ImplicitParamDecl::Other);
  8483. ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
  8484. ImplicitParamDecl::Other);
  8485. FunctionArgList Args;
  8486. Args.push_back(&HandleArg);
  8487. Args.push_back(&BaseArg);
  8488. Args.push_back(&BeginArg);
  8489. Args.push_back(&SizeArg);
  8490. Args.push_back(&TypeArg);
  8491. const CGFunctionInfo &FnInfo =
  8492. CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
  8493. llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
  8494. SmallString<64> TyStr;
  8495. llvm::raw_svector_ostream Out(TyStr);
  8496. CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
  8497. std::string Name = getName({"omp_mapper", TyStr, D->getName()});
  8498. auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
  8499. Name, &CGM.getModule());
  8500. CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
  8501. Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
  8502. // Start the mapper function code generation.
  8503. CodeGenFunction MapperCGF(CGM);
  8504. MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
  8505. // Compute the starting and end addreses of array elements.
  8506. llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
  8507. MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
  8508. C.getPointerType(Int64Ty), Loc);
  8509. llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
  8510. MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
  8511. CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
  8512. llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
  8513. llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
  8514. MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
  8515. C.getPointerType(Int64Ty), Loc);
  8516. // Prepare common arguments for array initiation and deletion.
  8517. llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
  8518. MapperCGF.GetAddrOfLocalVar(&HandleArg),
  8519. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8520. llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
  8521. MapperCGF.GetAddrOfLocalVar(&BaseArg),
  8522. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8523. llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
  8524. MapperCGF.GetAddrOfLocalVar(&BeginArg),
  8525. /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
  8526. // Emit array initiation if this is an array section and \p MapType indicates
  8527. // that memory allocation is required.
  8528. llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
  8529. emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
  8530. ElementSize, HeadBB, /*IsInit=*/true);
  8531. // Emit a for loop to iterate through SizeArg of elements and map all of them.
  8532. // Emit the loop header block.
  8533. MapperCGF.EmitBlock(HeadBB);
  8534. llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
  8535. llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
  8536. // Evaluate whether the initial condition is satisfied.
  8537. llvm::Value *IsEmpty =
  8538. MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
  8539. MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  8540. llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
  8541. // Emit the loop body block.
  8542. MapperCGF.EmitBlock(BodyBB);
  8543. llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
  8544. PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
  8545. PtrPHI->addIncoming(PtrBegin, EntryBB);
  8546. Address PtrCurrent =
  8547. Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
  8548. .getAlignment()
  8549. .alignmentOfArrayElement(ElementSize));
  8550. // Privatize the declared variable of mapper to be the current array element.
  8551. CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
  8552. Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
  8553. return MapperCGF
  8554. .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
  8555. .getAddress();
  8556. });
  8557. (void)Scope.Privatize();
  8558. // Get map clause information. Fill up the arrays with all mapped variables.
  8559. MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
  8560. MappableExprsHandler::MapValuesArrayTy Pointers;
  8561. MappableExprsHandler::MapValuesArrayTy Sizes;
  8562. MappableExprsHandler::MapFlagsArrayTy MapTypes;
  8563. MappableExprsHandler MEHandler(*D, MapperCGF);
  8564. MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
  8565. // Call the runtime API __tgt_mapper_num_components to get the number of
  8566. // pre-existing components.
  8567. llvm::Value *OffloadingArgs[] = {Handle};
  8568. llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
  8569. createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
  8570. llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
  8571. PreviousSize,
  8572. MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
  8573. // Fill up the runtime mapper handle for all components.
  8574. for (unsigned I = 0; I < BasePointers.size(); ++I) {
  8575. llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
  8576. *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
  8577. llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
  8578. Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
  8579. llvm::Value *CurSizeArg = Sizes[I];
  8580. // Extract the MEMBER_OF field from the map type.
  8581. llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
  8582. MapperCGF.EmitBlock(MemberBB);
  8583. llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
  8584. llvm::Value *Member = MapperCGF.Builder.CreateAnd(
  8585. OriMapType,
  8586. MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
  8587. llvm::BasicBlock *MemberCombineBB =
  8588. MapperCGF.createBasicBlock("omp.member.combine");
  8589. llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
  8590. llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
  8591. MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
  8592. // Add the number of pre-existing components to the MEMBER_OF field if it
  8593. // is valid.
  8594. MapperCGF.EmitBlock(MemberCombineBB);
  8595. llvm::Value *CombinedMember =
  8596. MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
  8597. // Do nothing if it is not a member of previous components.
  8598. MapperCGF.EmitBlock(TypeBB);
  8599. llvm::PHINode *MemberMapType =
  8600. MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
  8601. MemberMapType->addIncoming(OriMapType, MemberBB);
  8602. MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
  8603. // Combine the map type inherited from user-defined mapper with that
  8604. // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
  8605. // bits of the \a MapType, which is the input argument of the mapper
  8606. // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
  8607. // bits of MemberMapType.
  8608. // [OpenMP 5.0], 1.2.6. map-type decay.
  8609. // | alloc | to | from | tofrom | release | delete
  8610. // ----------------------------------------------------------
  8611. // alloc | alloc | alloc | alloc | alloc | release | delete
  8612. // to | alloc | to | alloc | to | release | delete
  8613. // from | alloc | alloc | from | from | release | delete
  8614. // tofrom | alloc | to | from | tofrom | release | delete
  8615. llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
  8616. MapType,
  8617. MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
  8618. MappableExprsHandler::OMP_MAP_FROM));
  8619. llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
  8620. llvm::BasicBlock *AllocElseBB =
  8621. MapperCGF.createBasicBlock("omp.type.alloc.else");
  8622. llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
  8623. llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
  8624. llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
  8625. llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
  8626. llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
  8627. MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
  8628. // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
  8629. MapperCGF.EmitBlock(AllocBB);
  8630. llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
  8631. MemberMapType,
  8632. MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
  8633. MappableExprsHandler::OMP_MAP_FROM)));
  8634. MapperCGF.Builder.CreateBr(EndBB);
  8635. MapperCGF.EmitBlock(AllocElseBB);
  8636. llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
  8637. LeftToFrom,
  8638. MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
  8639. MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
  8640. // In case of to, clear OMP_MAP_FROM.
  8641. MapperCGF.EmitBlock(ToBB);
  8642. llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
  8643. MemberMapType,
  8644. MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
  8645. MapperCGF.Builder.CreateBr(EndBB);
  8646. MapperCGF.EmitBlock(ToElseBB);
  8647. llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
  8648. LeftToFrom,
  8649. MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
  8650. MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
  8651. // In case of from, clear OMP_MAP_TO.
  8652. MapperCGF.EmitBlock(FromBB);
  8653. llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
  8654. MemberMapType,
  8655. MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
  8656. // In case of tofrom, do nothing.
  8657. MapperCGF.EmitBlock(EndBB);
  8658. llvm::PHINode *CurMapType =
  8659. MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
  8660. CurMapType->addIncoming(AllocMapType, AllocBB);
  8661. CurMapType->addIncoming(ToMapType, ToBB);
  8662. CurMapType->addIncoming(FromMapType, FromBB);
  8663. CurMapType->addIncoming(MemberMapType, ToElseBB);
  8664. // TODO: call the corresponding mapper function if a user-defined mapper is
  8665. // associated with this map clause.
  8666. // Call the runtime API __tgt_push_mapper_component to fill up the runtime
  8667. // data structure.
  8668. llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
  8669. CurSizeArg, CurMapType};
  8670. MapperCGF.EmitRuntimeCall(
  8671. createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
  8672. OffloadingArgs);
  8673. }
  8674. // Update the pointer to point to the next element that needs to be mapped,
  8675. // and check whether we have mapped all elements.
  8676. llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
  8677. PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
  8678. PtrPHI->addIncoming(PtrNext, BodyBB);
  8679. llvm::Value *IsDone =
  8680. MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
  8681. llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
  8682. MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
  8683. MapperCGF.EmitBlock(ExitBB);
  8684. // Emit array deletion if this is an array section and \p MapType indicates
  8685. // that deletion is required.
  8686. emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
  8687. ElementSize, DoneBB, /*IsInit=*/false);
  8688. // Emit the function exit block.
  8689. MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  8690. MapperCGF.FinishFunction();
  8691. UDMMap.try_emplace(D, Fn);
  8692. if (CGF) {
  8693. auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
  8694. Decls.second.push_back(D);
  8695. }
  8696. }
  8697. /// Emit the array initialization or deletion portion for user-defined mapper
  8698. /// code generation. First, it evaluates whether an array section is mapped and
  8699. /// whether the \a MapType instructs to delete this section. If \a IsInit is
  8700. /// true, and \a MapType indicates to not delete this array, array
  8701. /// initialization code is generated. If \a IsInit is false, and \a MapType
  8702. /// indicates to not this array, array deletion code is generated.
  8703. void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
  8704. CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
  8705. llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
  8706. CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
  8707. StringRef Prefix = IsInit ? ".init" : ".del";
  8708. // Evaluate if this is an array section.
  8709. llvm::BasicBlock *IsDeleteBB =
  8710. MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
  8711. llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
  8712. llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
  8713. Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
  8714. MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
  8715. // Evaluate if we are going to delete this section.
  8716. MapperCGF.EmitBlock(IsDeleteBB);
  8717. llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
  8718. MapType,
  8719. MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
  8720. llvm::Value *DeleteCond;
  8721. if (IsInit) {
  8722. DeleteCond = MapperCGF.Builder.CreateIsNull(
  8723. DeleteBit, "omp.array" + Prefix + ".delete");
  8724. } else {
  8725. DeleteCond = MapperCGF.Builder.CreateIsNotNull(
  8726. DeleteBit, "omp.array" + Prefix + ".delete");
  8727. }
  8728. MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
  8729. MapperCGF.EmitBlock(BodyBB);
  8730. // Get the array size by multiplying element size and element number (i.e., \p
  8731. // Size).
  8732. llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
  8733. Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
  8734. // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
  8735. // memory allocation/deletion purpose only.
  8736. llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
  8737. MapType,
  8738. MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
  8739. MappableExprsHandler::OMP_MAP_FROM)));
  8740. // Call the runtime API __tgt_push_mapper_component to fill up the runtime
  8741. // data structure.
  8742. llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
  8743. MapperCGF.EmitRuntimeCall(
  8744. createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
  8745. }
  8746. void CGOpenMPRuntime::emitTargetNumIterationsCall(
  8747. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  8748. llvm::Value *DeviceID,
  8749. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  8750. const OMPLoopDirective &D)>
  8751. SizeEmitter) {
  8752. OpenMPDirectiveKind Kind = D.getDirectiveKind();
  8753. const OMPExecutableDirective *TD = &D;
  8754. // Get nested teams distribute kind directive, if any.
  8755. if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
  8756. TD = getNestedDistributeDirective(CGM.getContext(), D);
  8757. if (!TD)
  8758. return;
  8759. const auto *LD = cast<OMPLoopDirective>(TD);
  8760. auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
  8761. PrePostActionTy &) {
  8762. if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
  8763. llvm::Value *Args[] = {DeviceID, NumIterations};
  8764. CGF.EmitRuntimeCall(
  8765. createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
  8766. }
  8767. };
  8768. emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
  8769. }
  8770. void CGOpenMPRuntime::emitTargetCall(
  8771. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  8772. llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
  8773. const Expr *Device,
  8774. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  8775. const OMPLoopDirective &D)>
  8776. SizeEmitter) {
  8777. if (!CGF.HaveInsertPoint())
  8778. return;
  8779. assert(OutlinedFn && "Invalid outlined function!");
  8780. const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
  8781. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  8782. const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
  8783. auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
  8784. PrePostActionTy &) {
  8785. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  8786. };
  8787. emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
  8788. CodeGenFunction::OMPTargetDataInfo InputInfo;
  8789. llvm::Value *MapTypesArray = nullptr;
  8790. // Fill up the pointer arrays and transfer execution to the device.
  8791. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
  8792. &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
  8793. SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
  8794. // On top of the arrays that were filled up, the target offloading call
  8795. // takes as arguments the device id as well as the host pointer. The host
  8796. // pointer is used by the runtime library to identify the current target
  8797. // region, so it only has to be unique and not necessarily point to
  8798. // anything. It could be the pointer to the outlined function that
  8799. // implements the target region, but we aren't using that so that the
  8800. // compiler doesn't need to keep that, and could therefore inline the host
  8801. // function if proven worthwhile during optimization.
  8802. // From this point on, we need to have an ID of the target region defined.
  8803. assert(OutlinedFnID && "Invalid outlined function ID!");
  8804. // Emit device ID if any.
  8805. llvm::Value *DeviceID;
  8806. if (Device) {
  8807. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  8808. CGF.Int64Ty, /*isSigned=*/true);
  8809. } else {
  8810. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  8811. }
  8812. // Emit the number of elements in the offloading arrays.
  8813. llvm::Value *PointerNum =
  8814. CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
  8815. // Return value of the runtime offloading call.
  8816. llvm::Value *Return;
  8817. llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
  8818. llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
  8819. // Emit tripcount for the target loop-based directive.
  8820. emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
  8821. bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
  8822. // The target region is an outlined function launched by the runtime
  8823. // via calls __tgt_target() or __tgt_target_teams().
  8824. //
  8825. // __tgt_target() launches a target region with one team and one thread,
  8826. // executing a serial region. This master thread may in turn launch
  8827. // more threads within its team upon encountering a parallel region,
  8828. // however, no additional teams can be launched on the device.
  8829. //
  8830. // __tgt_target_teams() launches a target region with one or more teams,
  8831. // each with one or more threads. This call is required for target
  8832. // constructs such as:
  8833. // 'target teams'
  8834. // 'target' / 'teams'
  8835. // 'target teams distribute parallel for'
  8836. // 'target parallel'
  8837. // and so on.
  8838. //
  8839. // Note that on the host and CPU targets, the runtime implementation of
  8840. // these calls simply call the outlined function without forking threads.
  8841. // The outlined functions themselves have runtime calls to
  8842. // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
  8843. // the compiler in emitTeamsCall() and emitParallelCall().
  8844. //
  8845. // In contrast, on the NVPTX target, the implementation of
  8846. // __tgt_target_teams() launches a GPU kernel with the requested number
  8847. // of teams and threads so no additional calls to the runtime are required.
  8848. if (NumTeams) {
  8849. // If we have NumTeams defined this means that we have an enclosed teams
  8850. // region. Therefore we also expect to have NumThreads defined. These two
  8851. // values should be defined in the presence of a teams directive,
  8852. // regardless of having any clauses associated. If the user is using teams
  8853. // but no clauses, these two values will be the default that should be
  8854. // passed to the runtime library - a 32-bit integer with the value zero.
  8855. assert(NumThreads && "Thread limit expression should be available along "
  8856. "with number of teams.");
  8857. llvm::Value *OffloadingArgs[] = {DeviceID,
  8858. OutlinedFnID,
  8859. PointerNum,
  8860. InputInfo.BasePointersArray.getPointer(),
  8861. InputInfo.PointersArray.getPointer(),
  8862. InputInfo.SizesArray.getPointer(),
  8863. MapTypesArray,
  8864. NumTeams,
  8865. NumThreads};
  8866. Return = CGF.EmitRuntimeCall(
  8867. createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
  8868. : OMPRTL__tgt_target_teams),
  8869. OffloadingArgs);
  8870. } else {
  8871. llvm::Value *OffloadingArgs[] = {DeviceID,
  8872. OutlinedFnID,
  8873. PointerNum,
  8874. InputInfo.BasePointersArray.getPointer(),
  8875. InputInfo.PointersArray.getPointer(),
  8876. InputInfo.SizesArray.getPointer(),
  8877. MapTypesArray};
  8878. Return = CGF.EmitRuntimeCall(
  8879. createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
  8880. : OMPRTL__tgt_target),
  8881. OffloadingArgs);
  8882. }
  8883. // Check the error code and execute the host version if required.
  8884. llvm::BasicBlock *OffloadFailedBlock =
  8885. CGF.createBasicBlock("omp_offload.failed");
  8886. llvm::BasicBlock *OffloadContBlock =
  8887. CGF.createBasicBlock("omp_offload.cont");
  8888. llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
  8889. CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
  8890. CGF.EmitBlock(OffloadFailedBlock);
  8891. if (RequiresOuterTask) {
  8892. CapturedVars.clear();
  8893. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  8894. }
  8895. emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
  8896. CGF.EmitBranch(OffloadContBlock);
  8897. CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
  8898. };
  8899. // Notify that the host version must be executed.
  8900. auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
  8901. RequiresOuterTask](CodeGenFunction &CGF,
  8902. PrePostActionTy &) {
  8903. if (RequiresOuterTask) {
  8904. CapturedVars.clear();
  8905. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  8906. }
  8907. emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
  8908. };
  8909. auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
  8910. &CapturedVars, RequiresOuterTask,
  8911. &CS](CodeGenFunction &CGF, PrePostActionTy &) {
  8912. // Fill up the arrays with all the captured variables.
  8913. MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
  8914. MappableExprsHandler::MapValuesArrayTy Pointers;
  8915. MappableExprsHandler::MapValuesArrayTy Sizes;
  8916. MappableExprsHandler::MapFlagsArrayTy MapTypes;
  8917. // Get mappable expression information.
  8918. MappableExprsHandler MEHandler(D, CGF);
  8919. llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
  8920. auto RI = CS.getCapturedRecordDecl()->field_begin();
  8921. auto CV = CapturedVars.begin();
  8922. for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
  8923. CE = CS.capture_end();
  8924. CI != CE; ++CI, ++RI, ++CV) {
  8925. MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
  8926. MappableExprsHandler::MapValuesArrayTy CurPointers;
  8927. MappableExprsHandler::MapValuesArrayTy CurSizes;
  8928. MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
  8929. MappableExprsHandler::StructRangeInfoTy PartialStruct;
  8930. // VLA sizes are passed to the outlined region by copy and do not have map
  8931. // information associated.
  8932. if (CI->capturesVariableArrayType()) {
  8933. CurBasePointers.push_back(*CV);
  8934. CurPointers.push_back(*CV);
  8935. CurSizes.push_back(CGF.Builder.CreateIntCast(
  8936. CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
  8937. // Copy to the device as an argument. No need to retrieve it.
  8938. CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
  8939. MappableExprsHandler::OMP_MAP_TARGET_PARAM |
  8940. MappableExprsHandler::OMP_MAP_IMPLICIT);
  8941. } else {
  8942. // If we have any information in the map clause, we use it, otherwise we
  8943. // just do a default mapping.
  8944. MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
  8945. CurSizes, CurMapTypes, PartialStruct);
  8946. if (CurBasePointers.empty())
  8947. MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
  8948. CurPointers, CurSizes, CurMapTypes);
  8949. // Generate correct mapping for variables captured by reference in
  8950. // lambdas.
  8951. if (CI->capturesVariable())
  8952. MEHandler.generateInfoForLambdaCaptures(
  8953. CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
  8954. CurMapTypes, LambdaPointers);
  8955. }
  8956. // We expect to have at least an element of information for this capture.
  8957. assert(!CurBasePointers.empty() &&
  8958. "Non-existing map pointer for capture!");
  8959. assert(CurBasePointers.size() == CurPointers.size() &&
  8960. CurBasePointers.size() == CurSizes.size() &&
  8961. CurBasePointers.size() == CurMapTypes.size() &&
  8962. "Inconsistent map information sizes!");
  8963. // If there is an entry in PartialStruct it means we have a struct with
  8964. // individual members mapped. Emit an extra combined entry.
  8965. if (PartialStruct.Base.isValid())
  8966. MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
  8967. CurMapTypes, PartialStruct);
  8968. // We need to append the results of this capture to what we already have.
  8969. BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
  8970. Pointers.append(CurPointers.begin(), CurPointers.end());
  8971. Sizes.append(CurSizes.begin(), CurSizes.end());
  8972. MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
  8973. }
  8974. // Adjust MEMBER_OF flags for the lambdas captures.
  8975. MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
  8976. Pointers, MapTypes);
  8977. // Map other list items in the map clause which are not captured variables
  8978. // but "declare target link" global variables.
  8979. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
  8980. MapTypes);
  8981. TargetDataInfo Info;
  8982. // Fill up the arrays and create the arguments.
  8983. emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
  8984. emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
  8985. Info.PointersArray, Info.SizesArray,
  8986. Info.MapTypesArray, Info);
  8987. InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
  8988. InputInfo.BasePointersArray =
  8989. Address(Info.BasePointersArray, CGM.getPointerAlign());
  8990. InputInfo.PointersArray =
  8991. Address(Info.PointersArray, CGM.getPointerAlign());
  8992. InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
  8993. MapTypesArray = Info.MapTypesArray;
  8994. if (RequiresOuterTask)
  8995. CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
  8996. else
  8997. emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
  8998. };
  8999. auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
  9000. CodeGenFunction &CGF, PrePostActionTy &) {
  9001. if (RequiresOuterTask) {
  9002. CodeGenFunction::OMPTargetDataInfo InputInfo;
  9003. CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
  9004. } else {
  9005. emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
  9006. }
  9007. };
  9008. // If we have a target function ID it means that we need to support
  9009. // offloading, otherwise, just execute on the host. We need to execute on host
  9010. // regardless of the conditional in the if clause if, e.g., the user do not
  9011. // specify target triples.
  9012. if (OutlinedFnID) {
  9013. if (IfCond) {
  9014. emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
  9015. } else {
  9016. RegionCodeGenTy ThenRCG(TargetThenGen);
  9017. ThenRCG(CGF);
  9018. }
  9019. } else {
  9020. RegionCodeGenTy ElseRCG(TargetElseGen);
  9021. ElseRCG(CGF);
  9022. }
  9023. }
  9024. void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
  9025. StringRef ParentName) {
  9026. if (!S)
  9027. return;
  9028. // Codegen OMP target directives that offload compute to the device.
  9029. bool RequiresDeviceCodegen =
  9030. isa<OMPExecutableDirective>(S) &&
  9031. isOpenMPTargetExecutionDirective(
  9032. cast<OMPExecutableDirective>(S)->getDirectiveKind());
  9033. if (RequiresDeviceCodegen) {
  9034. const auto &E = *cast<OMPExecutableDirective>(S);
  9035. unsigned DeviceID;
  9036. unsigned FileID;
  9037. unsigned Line;
  9038. getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
  9039. FileID, Line);
  9040. // Is this a target region that should not be emitted as an entry point? If
  9041. // so just signal we are done with this target region.
  9042. if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
  9043. ParentName, Line))
  9044. return;
  9045. switch (E.getDirectiveKind()) {
  9046. case OMPD_target:
  9047. CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
  9048. cast<OMPTargetDirective>(E));
  9049. break;
  9050. case OMPD_target_parallel:
  9051. CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
  9052. CGM, ParentName, cast<OMPTargetParallelDirective>(E));
  9053. break;
  9054. case OMPD_target_teams:
  9055. CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
  9056. CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
  9057. break;
  9058. case OMPD_target_teams_distribute:
  9059. CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
  9060. CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
  9061. break;
  9062. case OMPD_target_teams_distribute_simd:
  9063. CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
  9064. CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
  9065. break;
  9066. case OMPD_target_parallel_for:
  9067. CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
  9068. CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
  9069. break;
  9070. case OMPD_target_parallel_for_simd:
  9071. CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
  9072. CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
  9073. break;
  9074. case OMPD_target_simd:
  9075. CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
  9076. CGM, ParentName, cast<OMPTargetSimdDirective>(E));
  9077. break;
  9078. case OMPD_target_teams_distribute_parallel_for:
  9079. CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
  9080. CGM, ParentName,
  9081. cast<OMPTargetTeamsDistributeParallelForDirective>(E));
  9082. break;
  9083. case OMPD_target_teams_distribute_parallel_for_simd:
  9084. CodeGenFunction::
  9085. EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
  9086. CGM, ParentName,
  9087. cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
  9088. break;
  9089. case OMPD_parallel:
  9090. case OMPD_for:
  9091. case OMPD_parallel_for:
  9092. case OMPD_parallel_sections:
  9093. case OMPD_for_simd:
  9094. case OMPD_parallel_for_simd:
  9095. case OMPD_cancel:
  9096. case OMPD_cancellation_point:
  9097. case OMPD_ordered:
  9098. case OMPD_threadprivate:
  9099. case OMPD_allocate:
  9100. case OMPD_task:
  9101. case OMPD_simd:
  9102. case OMPD_sections:
  9103. case OMPD_section:
  9104. case OMPD_single:
  9105. case OMPD_master:
  9106. case OMPD_critical:
  9107. case OMPD_taskyield:
  9108. case OMPD_barrier:
  9109. case OMPD_taskwait:
  9110. case OMPD_taskgroup:
  9111. case OMPD_atomic:
  9112. case OMPD_flush:
  9113. case OMPD_teams:
  9114. case OMPD_target_data:
  9115. case OMPD_target_exit_data:
  9116. case OMPD_target_enter_data:
  9117. case OMPD_distribute:
  9118. case OMPD_distribute_simd:
  9119. case OMPD_distribute_parallel_for:
  9120. case OMPD_distribute_parallel_for_simd:
  9121. case OMPD_teams_distribute:
  9122. case OMPD_teams_distribute_simd:
  9123. case OMPD_teams_distribute_parallel_for:
  9124. case OMPD_teams_distribute_parallel_for_simd:
  9125. case OMPD_target_update:
  9126. case OMPD_declare_simd:
  9127. case OMPD_declare_variant:
  9128. case OMPD_declare_target:
  9129. case OMPD_end_declare_target:
  9130. case OMPD_declare_reduction:
  9131. case OMPD_declare_mapper:
  9132. case OMPD_taskloop:
  9133. case OMPD_taskloop_simd:
  9134. case OMPD_requires:
  9135. case OMPD_unknown:
  9136. llvm_unreachable("Unknown target directive for OpenMP device codegen.");
  9137. }
  9138. return;
  9139. }
  9140. if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
  9141. if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
  9142. return;
  9143. scanForTargetRegionsFunctions(
  9144. E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
  9145. return;
  9146. }
  9147. // If this is a lambda function, look into its body.
  9148. if (const auto *L = dyn_cast<LambdaExpr>(S))
  9149. S = L->getBody();
  9150. // Keep looking for target regions recursively.
  9151. for (const Stmt *II : S->children())
  9152. scanForTargetRegionsFunctions(II, ParentName);
  9153. }
  9154. bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
  9155. // If emitting code for the host, we do not process FD here. Instead we do
  9156. // the normal code generation.
  9157. if (!CGM.getLangOpts().OpenMPIsDevice) {
  9158. if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
  9159. Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
  9160. OMPDeclareTargetDeclAttr::getDeviceType(FD);
  9161. // Do not emit device_type(nohost) functions for the host.
  9162. if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
  9163. return true;
  9164. }
  9165. return false;
  9166. }
  9167. const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
  9168. StringRef Name = CGM.getMangledName(GD);
  9169. // Try to detect target regions in the function.
  9170. if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
  9171. scanForTargetRegionsFunctions(FD->getBody(), Name);
  9172. Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
  9173. OMPDeclareTargetDeclAttr::getDeviceType(FD);
  9174. // Do not emit device_type(nohost) functions for the host.
  9175. if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
  9176. return true;
  9177. }
  9178. // Do not to emit function if it is not marked as declare target.
  9179. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
  9180. AlreadyEmittedTargetFunctions.count(Name) == 0;
  9181. }
  9182. bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
  9183. if (!CGM.getLangOpts().OpenMPIsDevice)
  9184. return false;
  9185. // Check if there are Ctors/Dtors in this declaration and look for target
  9186. // regions in it. We use the complete variant to produce the kernel name
  9187. // mangling.
  9188. QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
  9189. if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
  9190. for (const CXXConstructorDecl *Ctor : RD->ctors()) {
  9191. StringRef ParentName =
  9192. CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
  9193. scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
  9194. }
  9195. if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
  9196. StringRef ParentName =
  9197. CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
  9198. scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
  9199. }
  9200. }
  9201. // Do not to emit variable if it is not marked as declare target.
  9202. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9203. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
  9204. cast<VarDecl>(GD.getDecl()));
  9205. if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
  9206. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  9207. HasRequiresUnifiedSharedMemory)) {
  9208. DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
  9209. return true;
  9210. }
  9211. return false;
  9212. }
  9213. llvm::Constant *
  9214. CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
  9215. const VarDecl *VD) {
  9216. assert(VD->getType().isConstant(CGM.getContext()) &&
  9217. "Expected constant variable.");
  9218. StringRef VarName;
  9219. llvm::Constant *Addr;
  9220. llvm::GlobalValue::LinkageTypes Linkage;
  9221. QualType Ty = VD->getType();
  9222. SmallString<128> Buffer;
  9223. {
  9224. unsigned DeviceID;
  9225. unsigned FileID;
  9226. unsigned Line;
  9227. getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
  9228. FileID, Line);
  9229. llvm::raw_svector_ostream OS(Buffer);
  9230. OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
  9231. << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
  9232. VarName = OS.str();
  9233. }
  9234. Linkage = llvm::GlobalValue::InternalLinkage;
  9235. Addr =
  9236. getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
  9237. getDefaultFirstprivateAddressSpace());
  9238. cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
  9239. CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
  9240. CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
  9241. OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
  9242. VarName, Addr, VarSize,
  9243. OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
  9244. return Addr;
  9245. }
  9246. void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
  9247. llvm::Constant *Addr) {
  9248. if (CGM.getLangOpts().OMPTargetTriples.empty() &&
  9249. !CGM.getLangOpts().OpenMPIsDevice)
  9250. return;
  9251. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9252. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  9253. if (!Res) {
  9254. if (CGM.getLangOpts().OpenMPIsDevice) {
  9255. // Register non-target variables being emitted in device code (debug info
  9256. // may cause this).
  9257. StringRef VarName = CGM.getMangledName(VD);
  9258. EmittedNonTargetVariables.try_emplace(VarName, Addr);
  9259. }
  9260. return;
  9261. }
  9262. // Register declare target variables.
  9263. OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
  9264. StringRef VarName;
  9265. CharUnits VarSize;
  9266. llvm::GlobalValue::LinkageTypes Linkage;
  9267. if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  9268. !HasRequiresUnifiedSharedMemory) {
  9269. Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
  9270. VarName = CGM.getMangledName(VD);
  9271. if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
  9272. VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
  9273. assert(!VarSize.isZero() && "Expected non-zero size of the variable");
  9274. } else {
  9275. VarSize = CharUnits::Zero();
  9276. }
  9277. Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
  9278. // Temp solution to prevent optimizations of the internal variables.
  9279. if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
  9280. std::string RefName = getName({VarName, "ref"});
  9281. if (!CGM.GetGlobalValue(RefName)) {
  9282. llvm::Constant *AddrRef =
  9283. getOrCreateInternalVariable(Addr->getType(), RefName);
  9284. auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
  9285. GVAddrRef->setConstant(/*Val=*/true);
  9286. GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
  9287. GVAddrRef->setInitializer(Addr);
  9288. CGM.addCompilerUsedGlobal(GVAddrRef);
  9289. }
  9290. }
  9291. } else {
  9292. assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
  9293. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  9294. HasRequiresUnifiedSharedMemory)) &&
  9295. "Declare target attribute must link or to with unified memory.");
  9296. if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
  9297. Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
  9298. else
  9299. Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
  9300. if (CGM.getLangOpts().OpenMPIsDevice) {
  9301. VarName = Addr->getName();
  9302. Addr = nullptr;
  9303. } else {
  9304. VarName = getAddrOfDeclareTargetVar(VD).getName();
  9305. Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
  9306. }
  9307. VarSize = CGM.getPointerSize();
  9308. Linkage = llvm::GlobalValue::WeakAnyLinkage;
  9309. }
  9310. OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
  9311. VarName, Addr, VarSize, Flags, Linkage);
  9312. }
  9313. bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
  9314. if (isa<FunctionDecl>(GD.getDecl()) ||
  9315. isa<OMPDeclareReductionDecl>(GD.getDecl()))
  9316. return emitTargetFunctions(GD);
  9317. return emitTargetGlobalVariable(GD);
  9318. }
  9319. void CGOpenMPRuntime::emitDeferredTargetDecls() const {
  9320. for (const VarDecl *VD : DeferredGlobalVariables) {
  9321. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
  9322. OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
  9323. if (!Res)
  9324. continue;
  9325. if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  9326. !HasRequiresUnifiedSharedMemory) {
  9327. CGM.EmitGlobal(VD);
  9328. } else {
  9329. assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
  9330. (*Res == OMPDeclareTargetDeclAttr::MT_To &&
  9331. HasRequiresUnifiedSharedMemory)) &&
  9332. "Expected link clause or to clause with unified memory.");
  9333. (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
  9334. }
  9335. }
  9336. }
  9337. void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
  9338. CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
  9339. assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
  9340. " Expected target-based directive.");
  9341. }
  9342. void CGOpenMPRuntime::checkArchForUnifiedAddressing(
  9343. const OMPRequiresDecl *D) {
  9344. for (const OMPClause *Clause : D->clauselists()) {
  9345. if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
  9346. HasRequiresUnifiedSharedMemory = true;
  9347. break;
  9348. }
  9349. }
  9350. }
  9351. bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
  9352. LangAS &AS) {
  9353. if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
  9354. return false;
  9355. const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
  9356. switch(A->getAllocatorType()) {
  9357. case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
  9358. // Not supported, fallback to the default mem space.
  9359. case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
  9360. case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
  9361. case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
  9362. case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
  9363. case OMPAllocateDeclAttr::OMPThreadMemAlloc:
  9364. case OMPAllocateDeclAttr::OMPConstMemAlloc:
  9365. case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
  9366. AS = LangAS::Default;
  9367. return true;
  9368. case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
  9369. llvm_unreachable("Expected predefined allocator for the variables with the "
  9370. "static storage.");
  9371. }
  9372. return false;
  9373. }
  9374. bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
  9375. return HasRequiresUnifiedSharedMemory;
  9376. }
  9377. CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
  9378. CodeGenModule &CGM)
  9379. : CGM(CGM) {
  9380. if (CGM.getLangOpts().OpenMPIsDevice) {
  9381. SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
  9382. CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
  9383. }
  9384. }
  9385. CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
  9386. if (CGM.getLangOpts().OpenMPIsDevice)
  9387. CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
  9388. }
  9389. bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
  9390. if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
  9391. return true;
  9392. StringRef Name = CGM.getMangledName(GD);
  9393. const auto *D = cast<FunctionDecl>(GD.getDecl());
  9394. // Do not to emit function if it is marked as declare target as it was already
  9395. // emitted.
  9396. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
  9397. if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
  9398. if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
  9399. return !F->isDeclaration();
  9400. return false;
  9401. }
  9402. return true;
  9403. }
  9404. return !AlreadyEmittedTargetFunctions.insert(Name).second;
  9405. }
  9406. llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
  9407. // If we don't have entries or if we are emitting code for the device, we
  9408. // don't need to do anything.
  9409. if (CGM.getLangOpts().OMPTargetTriples.empty() ||
  9410. CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
  9411. (OffloadEntriesInfoManager.empty() &&
  9412. !HasEmittedDeclareTargetRegion &&
  9413. !HasEmittedTargetRegion))
  9414. return nullptr;
  9415. // Create and register the function that handles the requires directives.
  9416. ASTContext &C = CGM.getContext();
  9417. llvm::Function *RequiresRegFn;
  9418. {
  9419. CodeGenFunction CGF(CGM);
  9420. const auto &FI = CGM.getTypes().arrangeNullaryFunction();
  9421. llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
  9422. std::string ReqName = getName({"omp_offloading", "requires_reg"});
  9423. RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
  9424. CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
  9425. OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
  9426. // TODO: check for other requires clauses.
  9427. // The requires directive takes effect only when a target region is
  9428. // present in the compilation unit. Otherwise it is ignored and not
  9429. // passed to the runtime. This avoids the runtime from throwing an error
  9430. // for mismatching requires clauses across compilation units that don't
  9431. // contain at least 1 target region.
  9432. assert((HasEmittedTargetRegion ||
  9433. HasEmittedDeclareTargetRegion ||
  9434. !OffloadEntriesInfoManager.empty()) &&
  9435. "Target or declare target region expected.");
  9436. if (HasRequiresUnifiedSharedMemory)
  9437. Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
  9438. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
  9439. llvm::ConstantInt::get(CGM.Int64Ty, Flags));
  9440. CGF.FinishFunction();
  9441. }
  9442. return RequiresRegFn;
  9443. }
  9444. llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
  9445. // If we have offloading in the current module, we need to emit the entries
  9446. // now and register the offloading descriptor.
  9447. createOffloadEntriesAndInfoMetadata();
  9448. // Create and register the offloading binary descriptors. This is the main
  9449. // entity that captures all the information about offloading in the current
  9450. // compilation unit.
  9451. return createOffloadingBinaryDescriptorRegistration();
  9452. }
  9453. void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
  9454. const OMPExecutableDirective &D,
  9455. SourceLocation Loc,
  9456. llvm::Function *OutlinedFn,
  9457. ArrayRef<llvm::Value *> CapturedVars) {
  9458. if (!CGF.HaveInsertPoint())
  9459. return;
  9460. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  9461. CodeGenFunction::RunCleanupsScope Scope(CGF);
  9462. // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
  9463. llvm::Value *Args[] = {
  9464. RTLoc,
  9465. CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
  9466. CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
  9467. llvm::SmallVector<llvm::Value *, 16> RealArgs;
  9468. RealArgs.append(std::begin(Args), std::end(Args));
  9469. RealArgs.append(CapturedVars.begin(), CapturedVars.end());
  9470. llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
  9471. CGF.EmitRuntimeCall(RTLFn, RealArgs);
  9472. }
  9473. void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
  9474. const Expr *NumTeams,
  9475. const Expr *ThreadLimit,
  9476. SourceLocation Loc) {
  9477. if (!CGF.HaveInsertPoint())
  9478. return;
  9479. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
  9480. llvm::Value *NumTeamsVal =
  9481. NumTeams
  9482. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
  9483. CGF.CGM.Int32Ty, /* isSigned = */ true)
  9484. : CGF.Builder.getInt32(0);
  9485. llvm::Value *ThreadLimitVal =
  9486. ThreadLimit
  9487. ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
  9488. CGF.CGM.Int32Ty, /* isSigned = */ true)
  9489. : CGF.Builder.getInt32(0);
  9490. // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
  9491. llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
  9492. ThreadLimitVal};
  9493. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
  9494. PushNumTeamsArgs);
  9495. }
  9496. void CGOpenMPRuntime::emitTargetDataCalls(
  9497. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  9498. const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
  9499. if (!CGF.HaveInsertPoint())
  9500. return;
  9501. // Action used to replace the default codegen action and turn privatization
  9502. // off.
  9503. PrePostActionTy NoPrivAction;
  9504. // Generate the code for the opening of the data environment. Capture all the
  9505. // arguments of the runtime call by reference because they are used in the
  9506. // closing of the region.
  9507. auto &&BeginThenGen = [this, &D, Device, &Info,
  9508. &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
  9509. // Fill up the arrays with all the mapped variables.
  9510. MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
  9511. MappableExprsHandler::MapValuesArrayTy Pointers;
  9512. MappableExprsHandler::MapValuesArrayTy Sizes;
  9513. MappableExprsHandler::MapFlagsArrayTy MapTypes;
  9514. // Get map clause information.
  9515. MappableExprsHandler MCHandler(D, CGF);
  9516. MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
  9517. // Fill up the arrays and create the arguments.
  9518. emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
  9519. llvm::Value *BasePointersArrayArg = nullptr;
  9520. llvm::Value *PointersArrayArg = nullptr;
  9521. llvm::Value *SizesArrayArg = nullptr;
  9522. llvm::Value *MapTypesArrayArg = nullptr;
  9523. emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
  9524. SizesArrayArg, MapTypesArrayArg, Info);
  9525. // Emit device ID if any.
  9526. llvm::Value *DeviceID = nullptr;
  9527. if (Device) {
  9528. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  9529. CGF.Int64Ty, /*isSigned=*/true);
  9530. } else {
  9531. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  9532. }
  9533. // Emit the number of elements in the offloading arrays.
  9534. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
  9535. llvm::Value *OffloadingArgs[] = {
  9536. DeviceID, PointerNum, BasePointersArrayArg,
  9537. PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
  9538. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
  9539. OffloadingArgs);
  9540. // If device pointer privatization is required, emit the body of the region
  9541. // here. It will have to be duplicated: with and without privatization.
  9542. if (!Info.CaptureDeviceAddrMap.empty())
  9543. CodeGen(CGF);
  9544. };
  9545. // Generate code for the closing of the data region.
  9546. auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
  9547. PrePostActionTy &) {
  9548. assert(Info.isValid() && "Invalid data environment closing arguments.");
  9549. llvm::Value *BasePointersArrayArg = nullptr;
  9550. llvm::Value *PointersArrayArg = nullptr;
  9551. llvm::Value *SizesArrayArg = nullptr;
  9552. llvm::Value *MapTypesArrayArg = nullptr;
  9553. emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
  9554. SizesArrayArg, MapTypesArrayArg, Info);
  9555. // Emit device ID if any.
  9556. llvm::Value *DeviceID = nullptr;
  9557. if (Device) {
  9558. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  9559. CGF.Int64Ty, /*isSigned=*/true);
  9560. } else {
  9561. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  9562. }
  9563. // Emit the number of elements in the offloading arrays.
  9564. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
  9565. llvm::Value *OffloadingArgs[] = {
  9566. DeviceID, PointerNum, BasePointersArrayArg,
  9567. PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
  9568. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
  9569. OffloadingArgs);
  9570. };
  9571. // If we need device pointer privatization, we need to emit the body of the
  9572. // region with no privatization in the 'else' branch of the conditional.
  9573. // Otherwise, we don't have to do anything.
  9574. auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
  9575. PrePostActionTy &) {
  9576. if (!Info.CaptureDeviceAddrMap.empty()) {
  9577. CodeGen.setAction(NoPrivAction);
  9578. CodeGen(CGF);
  9579. }
  9580. };
  9581. // We don't have to do anything to close the region if the if clause evaluates
  9582. // to false.
  9583. auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
  9584. if (IfCond) {
  9585. emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
  9586. } else {
  9587. RegionCodeGenTy RCG(BeginThenGen);
  9588. RCG(CGF);
  9589. }
  9590. // If we don't require privatization of device pointers, we emit the body in
  9591. // between the runtime calls. This avoids duplicating the body code.
  9592. if (Info.CaptureDeviceAddrMap.empty()) {
  9593. CodeGen.setAction(NoPrivAction);
  9594. CodeGen(CGF);
  9595. }
  9596. if (IfCond) {
  9597. emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
  9598. } else {
  9599. RegionCodeGenTy RCG(EndThenGen);
  9600. RCG(CGF);
  9601. }
  9602. }
  9603. void CGOpenMPRuntime::emitTargetDataStandAloneCall(
  9604. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  9605. const Expr *Device) {
  9606. if (!CGF.HaveInsertPoint())
  9607. return;
  9608. assert((isa<OMPTargetEnterDataDirective>(D) ||
  9609. isa<OMPTargetExitDataDirective>(D) ||
  9610. isa<OMPTargetUpdateDirective>(D)) &&
  9611. "Expecting either target enter, exit data, or update directives.");
  9612. CodeGenFunction::OMPTargetDataInfo InputInfo;
  9613. llvm::Value *MapTypesArray = nullptr;
  9614. // Generate the code for the opening of the data environment.
  9615. auto &&ThenGen = [this, &D, Device, &InputInfo,
  9616. &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
  9617. // Emit device ID if any.
  9618. llvm::Value *DeviceID = nullptr;
  9619. if (Device) {
  9620. DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
  9621. CGF.Int64Ty, /*isSigned=*/true);
  9622. } else {
  9623. DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
  9624. }
  9625. // Emit the number of elements in the offloading arrays.
  9626. llvm::Constant *PointerNum =
  9627. CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
  9628. llvm::Value *OffloadingArgs[] = {DeviceID,
  9629. PointerNum,
  9630. InputInfo.BasePointersArray.getPointer(),
  9631. InputInfo.PointersArray.getPointer(),
  9632. InputInfo.SizesArray.getPointer(),
  9633. MapTypesArray};
  9634. // Select the right runtime function call for each expected standalone
  9635. // directive.
  9636. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
  9637. OpenMPRTLFunction RTLFn;
  9638. switch (D.getDirectiveKind()) {
  9639. case OMPD_target_enter_data:
  9640. RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
  9641. : OMPRTL__tgt_target_data_begin;
  9642. break;
  9643. case OMPD_target_exit_data:
  9644. RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
  9645. : OMPRTL__tgt_target_data_end;
  9646. break;
  9647. case OMPD_target_update:
  9648. RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
  9649. : OMPRTL__tgt_target_data_update;
  9650. break;
  9651. case OMPD_parallel:
  9652. case OMPD_for:
  9653. case OMPD_parallel_for:
  9654. case OMPD_parallel_sections:
  9655. case OMPD_for_simd:
  9656. case OMPD_parallel_for_simd:
  9657. case OMPD_cancel:
  9658. case OMPD_cancellation_point:
  9659. case OMPD_ordered:
  9660. case OMPD_threadprivate:
  9661. case OMPD_allocate:
  9662. case OMPD_task:
  9663. case OMPD_simd:
  9664. case OMPD_sections:
  9665. case OMPD_section:
  9666. case OMPD_single:
  9667. case OMPD_master:
  9668. case OMPD_critical:
  9669. case OMPD_taskyield:
  9670. case OMPD_barrier:
  9671. case OMPD_taskwait:
  9672. case OMPD_taskgroup:
  9673. case OMPD_atomic:
  9674. case OMPD_flush:
  9675. case OMPD_teams:
  9676. case OMPD_target_data:
  9677. case OMPD_distribute:
  9678. case OMPD_distribute_simd:
  9679. case OMPD_distribute_parallel_for:
  9680. case OMPD_distribute_parallel_for_simd:
  9681. case OMPD_teams_distribute:
  9682. case OMPD_teams_distribute_simd:
  9683. case OMPD_teams_distribute_parallel_for:
  9684. case OMPD_teams_distribute_parallel_for_simd:
  9685. case OMPD_declare_simd:
  9686. case OMPD_declare_variant:
  9687. case OMPD_declare_target:
  9688. case OMPD_end_declare_target:
  9689. case OMPD_declare_reduction:
  9690. case OMPD_declare_mapper:
  9691. case OMPD_taskloop:
  9692. case OMPD_taskloop_simd:
  9693. case OMPD_target:
  9694. case OMPD_target_simd:
  9695. case OMPD_target_teams_distribute:
  9696. case OMPD_target_teams_distribute_simd:
  9697. case OMPD_target_teams_distribute_parallel_for:
  9698. case OMPD_target_teams_distribute_parallel_for_simd:
  9699. case OMPD_target_teams:
  9700. case OMPD_target_parallel:
  9701. case OMPD_target_parallel_for:
  9702. case OMPD_target_parallel_for_simd:
  9703. case OMPD_requires:
  9704. case OMPD_unknown:
  9705. llvm_unreachable("Unexpected standalone target data directive.");
  9706. break;
  9707. }
  9708. CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
  9709. };
  9710. auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
  9711. CodeGenFunction &CGF, PrePostActionTy &) {
  9712. // Fill up the arrays with all the mapped variables.
  9713. MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
  9714. MappableExprsHandler::MapValuesArrayTy Pointers;
  9715. MappableExprsHandler::MapValuesArrayTy Sizes;
  9716. MappableExprsHandler::MapFlagsArrayTy MapTypes;
  9717. // Get map clause information.
  9718. MappableExprsHandler MEHandler(D, CGF);
  9719. MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
  9720. TargetDataInfo Info;
  9721. // Fill up the arrays and create the arguments.
  9722. emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
  9723. emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
  9724. Info.PointersArray, Info.SizesArray,
  9725. Info.MapTypesArray, Info);
  9726. InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
  9727. InputInfo.BasePointersArray =
  9728. Address(Info.BasePointersArray, CGM.getPointerAlign());
  9729. InputInfo.PointersArray =
  9730. Address(Info.PointersArray, CGM.getPointerAlign());
  9731. InputInfo.SizesArray =
  9732. Address(Info.SizesArray, CGM.getPointerAlign());
  9733. MapTypesArray = Info.MapTypesArray;
  9734. if (D.hasClausesOfKind<OMPDependClause>())
  9735. CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
  9736. else
  9737. emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
  9738. };
  9739. if (IfCond) {
  9740. emitOMPIfClause(CGF, IfCond, TargetThenGen,
  9741. [](CodeGenFunction &CGF, PrePostActionTy &) {});
  9742. } else {
  9743. RegionCodeGenTy ThenRCG(TargetThenGen);
  9744. ThenRCG(CGF);
  9745. }
  9746. }
  9747. namespace {
  9748. /// Kind of parameter in a function with 'declare simd' directive.
  9749. enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
  9750. /// Attribute set of the parameter.
  9751. struct ParamAttrTy {
  9752. ParamKindTy Kind = Vector;
  9753. llvm::APSInt StrideOrArg;
  9754. llvm::APSInt Alignment;
  9755. };
  9756. } // namespace
  9757. static unsigned evaluateCDTSize(const FunctionDecl *FD,
  9758. ArrayRef<ParamAttrTy> ParamAttrs) {
  9759. // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
  9760. // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
  9761. // of that clause. The VLEN value must be power of 2.
  9762. // In other case the notion of the function`s "characteristic data type" (CDT)
  9763. // is used to compute the vector length.
  9764. // CDT is defined in the following order:
  9765. // a) For non-void function, the CDT is the return type.
  9766. // b) If the function has any non-uniform, non-linear parameters, then the
  9767. // CDT is the type of the first such parameter.
  9768. // c) If the CDT determined by a) or b) above is struct, union, or class
  9769. // type which is pass-by-value (except for the type that maps to the
  9770. // built-in complex data type), the characteristic data type is int.
  9771. // d) If none of the above three cases is applicable, the CDT is int.
  9772. // The VLEN is then determined based on the CDT and the size of vector
  9773. // register of that ISA for which current vector version is generated. The
  9774. // VLEN is computed using the formula below:
  9775. // VLEN = sizeof(vector_register) / sizeof(CDT),
  9776. // where vector register size specified in section 3.2.1 Registers and the
  9777. // Stack Frame of original AMD64 ABI document.
  9778. QualType RetType = FD->getReturnType();
  9779. if (RetType.isNull())
  9780. return 0;
  9781. ASTContext &C = FD->getASTContext();
  9782. QualType CDT;
  9783. if (!RetType.isNull() && !RetType->isVoidType()) {
  9784. CDT = RetType;
  9785. } else {
  9786. unsigned Offset = 0;
  9787. if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
  9788. if (ParamAttrs[Offset].Kind == Vector)
  9789. CDT = C.getPointerType(C.getRecordType(MD->getParent()));
  9790. ++Offset;
  9791. }
  9792. if (CDT.isNull()) {
  9793. for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
  9794. if (ParamAttrs[I + Offset].Kind == Vector) {
  9795. CDT = FD->getParamDecl(I)->getType();
  9796. break;
  9797. }
  9798. }
  9799. }
  9800. }
  9801. if (CDT.isNull())
  9802. CDT = C.IntTy;
  9803. CDT = CDT->getCanonicalTypeUnqualified();
  9804. if (CDT->isRecordType() || CDT->isUnionType())
  9805. CDT = C.IntTy;
  9806. return C.getTypeSize(CDT);
  9807. }
  9808. static void
  9809. emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
  9810. const llvm::APSInt &VLENVal,
  9811. ArrayRef<ParamAttrTy> ParamAttrs,
  9812. OMPDeclareSimdDeclAttr::BranchStateTy State) {
  9813. struct ISADataTy {
  9814. char ISA;
  9815. unsigned VecRegSize;
  9816. };
  9817. ISADataTy ISAData[] = {
  9818. {
  9819. 'b', 128
  9820. }, // SSE
  9821. {
  9822. 'c', 256
  9823. }, // AVX
  9824. {
  9825. 'd', 256
  9826. }, // AVX2
  9827. {
  9828. 'e', 512
  9829. }, // AVX512
  9830. };
  9831. llvm::SmallVector<char, 2> Masked;
  9832. switch (State) {
  9833. case OMPDeclareSimdDeclAttr::BS_Undefined:
  9834. Masked.push_back('N');
  9835. Masked.push_back('M');
  9836. break;
  9837. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  9838. Masked.push_back('N');
  9839. break;
  9840. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  9841. Masked.push_back('M');
  9842. break;
  9843. }
  9844. for (char Mask : Masked) {
  9845. for (const ISADataTy &Data : ISAData) {
  9846. SmallString<256> Buffer;
  9847. llvm::raw_svector_ostream Out(Buffer);
  9848. Out << "_ZGV" << Data.ISA << Mask;
  9849. if (!VLENVal) {
  9850. unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
  9851. assert(NumElts && "Non-zero simdlen/cdtsize expected");
  9852. Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
  9853. } else {
  9854. Out << VLENVal;
  9855. }
  9856. for (const ParamAttrTy &ParamAttr : ParamAttrs) {
  9857. switch (ParamAttr.Kind){
  9858. case LinearWithVarStride:
  9859. Out << 's' << ParamAttr.StrideOrArg;
  9860. break;
  9861. case Linear:
  9862. Out << 'l';
  9863. if (!!ParamAttr.StrideOrArg)
  9864. Out << ParamAttr.StrideOrArg;
  9865. break;
  9866. case Uniform:
  9867. Out << 'u';
  9868. break;
  9869. case Vector:
  9870. Out << 'v';
  9871. break;
  9872. }
  9873. if (!!ParamAttr.Alignment)
  9874. Out << 'a' << ParamAttr.Alignment;
  9875. }
  9876. Out << '_' << Fn->getName();
  9877. Fn->addFnAttr(Out.str());
  9878. }
  9879. }
  9880. }
  9881. // This are the Functions that are needed to mangle the name of the
  9882. // vector functions generated by the compiler, according to the rules
  9883. // defined in the "Vector Function ABI specifications for AArch64",
  9884. // available at
  9885. // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
  9886. /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
  9887. ///
  9888. /// TODO: Need to implement the behavior for reference marked with a
  9889. /// var or no linear modifiers (1.b in the section). For this, we
  9890. /// need to extend ParamKindTy to support the linear modifiers.
  9891. static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
  9892. QT = QT.getCanonicalType();
  9893. if (QT->isVoidType())
  9894. return false;
  9895. if (Kind == ParamKindTy::Uniform)
  9896. return false;
  9897. if (Kind == ParamKindTy::Linear)
  9898. return false;
  9899. // TODO: Handle linear references with modifiers
  9900. if (Kind == ParamKindTy::LinearWithVarStride)
  9901. return false;
  9902. return true;
  9903. }
  9904. /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
  9905. static bool getAArch64PBV(QualType QT, ASTContext &C) {
  9906. QT = QT.getCanonicalType();
  9907. unsigned Size = C.getTypeSize(QT);
  9908. // Only scalars and complex within 16 bytes wide set PVB to true.
  9909. if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
  9910. return false;
  9911. if (QT->isFloatingType())
  9912. return true;
  9913. if (QT->isIntegerType())
  9914. return true;
  9915. if (QT->isPointerType())
  9916. return true;
  9917. // TODO: Add support for complex types (section 3.1.2, item 2).
  9918. return false;
  9919. }
  9920. /// Computes the lane size (LS) of a return type or of an input parameter,
  9921. /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
  9922. /// TODO: Add support for references, section 3.2.1, item 1.
  9923. static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
  9924. if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
  9925. QualType PTy = QT.getCanonicalType()->getPointeeType();
  9926. if (getAArch64PBV(PTy, C))
  9927. return C.getTypeSize(PTy);
  9928. }
  9929. if (getAArch64PBV(QT, C))
  9930. return C.getTypeSize(QT);
  9931. return C.getTypeSize(C.getUIntPtrType());
  9932. }
  9933. // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
  9934. // signature of the scalar function, as defined in 3.2.2 of the
  9935. // AAVFABI.
  9936. static std::tuple<unsigned, unsigned, bool>
  9937. getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
  9938. QualType RetType = FD->getReturnType().getCanonicalType();
  9939. ASTContext &C = FD->getASTContext();
  9940. bool OutputBecomesInput = false;
  9941. llvm::SmallVector<unsigned, 8> Sizes;
  9942. if (!RetType->isVoidType()) {
  9943. Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
  9944. if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
  9945. OutputBecomesInput = true;
  9946. }
  9947. for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
  9948. QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
  9949. Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
  9950. }
  9951. assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
  9952. // The LS of a function parameter / return value can only be a power
  9953. // of 2, starting from 8 bits, up to 128.
  9954. assert(std::all_of(Sizes.begin(), Sizes.end(),
  9955. [](unsigned Size) {
  9956. return Size == 8 || Size == 16 || Size == 32 ||
  9957. Size == 64 || Size == 128;
  9958. }) &&
  9959. "Invalid size");
  9960. return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
  9961. *std::max_element(std::begin(Sizes), std::end(Sizes)),
  9962. OutputBecomesInput);
  9963. }
  9964. /// Mangle the parameter part of the vector function name according to
  9965. /// their OpenMP classification. The mangling function is defined in
  9966. /// section 3.5 of the AAVFABI.
  9967. static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
  9968. SmallString<256> Buffer;
  9969. llvm::raw_svector_ostream Out(Buffer);
  9970. for (const auto &ParamAttr : ParamAttrs) {
  9971. switch (ParamAttr.Kind) {
  9972. case LinearWithVarStride:
  9973. Out << "ls" << ParamAttr.StrideOrArg;
  9974. break;
  9975. case Linear:
  9976. Out << 'l';
  9977. // Don't print the step value if it is not present or if it is
  9978. // equal to 1.
  9979. if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
  9980. Out << ParamAttr.StrideOrArg;
  9981. break;
  9982. case Uniform:
  9983. Out << 'u';
  9984. break;
  9985. case Vector:
  9986. Out << 'v';
  9987. break;
  9988. }
  9989. if (!!ParamAttr.Alignment)
  9990. Out << 'a' << ParamAttr.Alignment;
  9991. }
  9992. return Out.str();
  9993. }
  9994. // Function used to add the attribute. The parameter `VLEN` is
  9995. // templated to allow the use of "x" when targeting scalable functions
  9996. // for SVE.
  9997. template <typename T>
  9998. static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
  9999. char ISA, StringRef ParSeq,
  10000. StringRef MangledName, bool OutputBecomesInput,
  10001. llvm::Function *Fn) {
  10002. SmallString<256> Buffer;
  10003. llvm::raw_svector_ostream Out(Buffer);
  10004. Out << Prefix << ISA << LMask << VLEN;
  10005. if (OutputBecomesInput)
  10006. Out << "v";
  10007. Out << ParSeq << "_" << MangledName;
  10008. Fn->addFnAttr(Out.str());
  10009. }
  10010. // Helper function to generate the Advanced SIMD names depending on
  10011. // the value of the NDS when simdlen is not present.
  10012. static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
  10013. StringRef Prefix, char ISA,
  10014. StringRef ParSeq, StringRef MangledName,
  10015. bool OutputBecomesInput,
  10016. llvm::Function *Fn) {
  10017. switch (NDS) {
  10018. case 8:
  10019. addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
  10020. OutputBecomesInput, Fn);
  10021. addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
  10022. OutputBecomesInput, Fn);
  10023. break;
  10024. case 16:
  10025. addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
  10026. OutputBecomesInput, Fn);
  10027. addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
  10028. OutputBecomesInput, Fn);
  10029. break;
  10030. case 32:
  10031. addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
  10032. OutputBecomesInput, Fn);
  10033. addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
  10034. OutputBecomesInput, Fn);
  10035. break;
  10036. case 64:
  10037. case 128:
  10038. addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
  10039. OutputBecomesInput, Fn);
  10040. break;
  10041. default:
  10042. llvm_unreachable("Scalar type is too wide.");
  10043. }
  10044. }
  10045. /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
  10046. static void emitAArch64DeclareSimdFunction(
  10047. CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
  10048. ArrayRef<ParamAttrTy> ParamAttrs,
  10049. OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
  10050. char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
  10051. // Get basic data for building the vector signature.
  10052. const auto Data = getNDSWDS(FD, ParamAttrs);
  10053. const unsigned NDS = std::get<0>(Data);
  10054. const unsigned WDS = std::get<1>(Data);
  10055. const bool OutputBecomesInput = std::get<2>(Data);
  10056. // Check the values provided via `simdlen` by the user.
  10057. // 1. A `simdlen(1)` doesn't produce vector signatures,
  10058. if (UserVLEN == 1) {
  10059. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10060. DiagnosticsEngine::Warning,
  10061. "The clause simdlen(1) has no effect when targeting aarch64.");
  10062. CGM.getDiags().Report(SLoc, DiagID);
  10063. return;
  10064. }
  10065. // 2. Section 3.3.1, item 1: user input must be a power of 2 for
  10066. // Advanced SIMD output.
  10067. if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
  10068. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10069. DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
  10070. "power of 2 when targeting Advanced SIMD.");
  10071. CGM.getDiags().Report(SLoc, DiagID);
  10072. return;
  10073. }
  10074. // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
  10075. // limits.
  10076. if (ISA == 's' && UserVLEN != 0) {
  10077. if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
  10078. unsigned DiagID = CGM.getDiags().getCustomDiagID(
  10079. DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
  10080. "lanes in the architectural constraints "
  10081. "for SVE (min is 128-bit, max is "
  10082. "2048-bit, by steps of 128-bit)");
  10083. CGM.getDiags().Report(SLoc, DiagID) << WDS;
  10084. return;
  10085. }
  10086. }
  10087. // Sort out parameter sequence.
  10088. const std::string ParSeq = mangleVectorParameters(ParamAttrs);
  10089. StringRef Prefix = "_ZGV";
  10090. // Generate simdlen from user input (if any).
  10091. if (UserVLEN) {
  10092. if (ISA == 's') {
  10093. // SVE generates only a masked function.
  10094. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10095. OutputBecomesInput, Fn);
  10096. } else {
  10097. assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
  10098. // Advanced SIMD generates one or two functions, depending on
  10099. // the `[not]inbranch` clause.
  10100. switch (State) {
  10101. case OMPDeclareSimdDeclAttr::BS_Undefined:
  10102. addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
  10103. OutputBecomesInput, Fn);
  10104. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10105. OutputBecomesInput, Fn);
  10106. break;
  10107. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  10108. addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
  10109. OutputBecomesInput, Fn);
  10110. break;
  10111. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  10112. addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
  10113. OutputBecomesInput, Fn);
  10114. break;
  10115. }
  10116. }
  10117. } else {
  10118. // If no user simdlen is provided, follow the AAVFABI rules for
  10119. // generating the vector length.
  10120. if (ISA == 's') {
  10121. // SVE, section 3.4.1, item 1.
  10122. addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
  10123. OutputBecomesInput, Fn);
  10124. } else {
  10125. assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
  10126. // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
  10127. // two vector names depending on the use of the clause
  10128. // `[not]inbranch`.
  10129. switch (State) {
  10130. case OMPDeclareSimdDeclAttr::BS_Undefined:
  10131. addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
  10132. OutputBecomesInput, Fn);
  10133. addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
  10134. OutputBecomesInput, Fn);
  10135. break;
  10136. case OMPDeclareSimdDeclAttr::BS_Notinbranch:
  10137. addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
  10138. OutputBecomesInput, Fn);
  10139. break;
  10140. case OMPDeclareSimdDeclAttr::BS_Inbranch:
  10141. addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
  10142. OutputBecomesInput, Fn);
  10143. break;
  10144. }
  10145. }
  10146. }
  10147. }
  10148. void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
  10149. llvm::Function *Fn) {
  10150. ASTContext &C = CGM.getContext();
  10151. FD = FD->getMostRecentDecl();
  10152. // Map params to their positions in function decl.
  10153. llvm::DenseMap<const Decl *, unsigned> ParamPositions;
  10154. if (isa<CXXMethodDecl>(FD))
  10155. ParamPositions.try_emplace(FD, 0);
  10156. unsigned ParamPos = ParamPositions.size();
  10157. for (const ParmVarDecl *P : FD->parameters()) {
  10158. ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
  10159. ++ParamPos;
  10160. }
  10161. while (FD) {
  10162. for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
  10163. llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
  10164. // Mark uniform parameters.
  10165. for (const Expr *E : Attr->uniforms()) {
  10166. E = E->IgnoreParenImpCasts();
  10167. unsigned Pos;
  10168. if (isa<CXXThisExpr>(E)) {
  10169. Pos = ParamPositions[FD];
  10170. } else {
  10171. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10172. ->getCanonicalDecl();
  10173. Pos = ParamPositions[PVD];
  10174. }
  10175. ParamAttrs[Pos].Kind = Uniform;
  10176. }
  10177. // Get alignment info.
  10178. auto NI = Attr->alignments_begin();
  10179. for (const Expr *E : Attr->aligneds()) {
  10180. E = E->IgnoreParenImpCasts();
  10181. unsigned Pos;
  10182. QualType ParmTy;
  10183. if (isa<CXXThisExpr>(E)) {
  10184. Pos = ParamPositions[FD];
  10185. ParmTy = E->getType();
  10186. } else {
  10187. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10188. ->getCanonicalDecl();
  10189. Pos = ParamPositions[PVD];
  10190. ParmTy = PVD->getType();
  10191. }
  10192. ParamAttrs[Pos].Alignment =
  10193. (*NI)
  10194. ? (*NI)->EvaluateKnownConstInt(C)
  10195. : llvm::APSInt::getUnsigned(
  10196. C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
  10197. .getQuantity());
  10198. ++NI;
  10199. }
  10200. // Mark linear parameters.
  10201. auto SI = Attr->steps_begin();
  10202. auto MI = Attr->modifiers_begin();
  10203. for (const Expr *E : Attr->linears()) {
  10204. E = E->IgnoreParenImpCasts();
  10205. unsigned Pos;
  10206. if (isa<CXXThisExpr>(E)) {
  10207. Pos = ParamPositions[FD];
  10208. } else {
  10209. const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
  10210. ->getCanonicalDecl();
  10211. Pos = ParamPositions[PVD];
  10212. }
  10213. ParamAttrTy &ParamAttr = ParamAttrs[Pos];
  10214. ParamAttr.Kind = Linear;
  10215. if (*SI) {
  10216. Expr::EvalResult Result;
  10217. if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
  10218. if (const auto *DRE =
  10219. cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
  10220. if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
  10221. ParamAttr.Kind = LinearWithVarStride;
  10222. ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
  10223. ParamPositions[StridePVD->getCanonicalDecl()]);
  10224. }
  10225. }
  10226. } else {
  10227. ParamAttr.StrideOrArg = Result.Val.getInt();
  10228. }
  10229. }
  10230. ++SI;
  10231. ++MI;
  10232. }
  10233. llvm::APSInt VLENVal;
  10234. SourceLocation ExprLoc;
  10235. const Expr *VLENExpr = Attr->getSimdlen();
  10236. if (VLENExpr) {
  10237. VLENVal = VLENExpr->EvaluateKnownConstInt(C);
  10238. ExprLoc = VLENExpr->getExprLoc();
  10239. }
  10240. OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
  10241. if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
  10242. CGM.getTriple().getArch() == llvm::Triple::x86_64) {
  10243. emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
  10244. } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
  10245. unsigned VLEN = VLENVal.getExtValue();
  10246. StringRef MangledName = Fn->getName();
  10247. if (CGM.getTarget().hasFeature("sve"))
  10248. emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
  10249. MangledName, 's', 128, Fn, ExprLoc);
  10250. if (CGM.getTarget().hasFeature("neon"))
  10251. emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
  10252. MangledName, 'n', 128, Fn, ExprLoc);
  10253. }
  10254. }
  10255. FD = FD->getPreviousDecl();
  10256. }
  10257. }
  10258. namespace {
  10259. /// Cleanup action for doacross support.
  10260. class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
  10261. public:
  10262. static const int DoacrossFinArgs = 2;
  10263. private:
  10264. llvm::FunctionCallee RTLFn;
  10265. llvm::Value *Args[DoacrossFinArgs];
  10266. public:
  10267. DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
  10268. ArrayRef<llvm::Value *> CallArgs)
  10269. : RTLFn(RTLFn) {
  10270. assert(CallArgs.size() == DoacrossFinArgs);
  10271. std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
  10272. }
  10273. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  10274. if (!CGF.HaveInsertPoint())
  10275. return;
  10276. CGF.EmitRuntimeCall(RTLFn, Args);
  10277. }
  10278. };
  10279. } // namespace
  10280. void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
  10281. const OMPLoopDirective &D,
  10282. ArrayRef<Expr *> NumIterations) {
  10283. if (!CGF.HaveInsertPoint())
  10284. return;
  10285. ASTContext &C = CGM.getContext();
  10286. QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
  10287. RecordDecl *RD;
  10288. if (KmpDimTy.isNull()) {
  10289. // Build struct kmp_dim { // loop bounds info casted to kmp_int64
  10290. // kmp_int64 lo; // lower
  10291. // kmp_int64 up; // upper
  10292. // kmp_int64 st; // stride
  10293. // };
  10294. RD = C.buildImplicitRecord("kmp_dim");
  10295. RD->startDefinition();
  10296. addFieldToRecordDecl(C, RD, Int64Ty);
  10297. addFieldToRecordDecl(C, RD, Int64Ty);
  10298. addFieldToRecordDecl(C, RD, Int64Ty);
  10299. RD->completeDefinition();
  10300. KmpDimTy = C.getRecordType(RD);
  10301. } else {
  10302. RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
  10303. }
  10304. llvm::APInt Size(/*numBits=*/32, NumIterations.size());
  10305. QualType ArrayTy =
  10306. C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
  10307. Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
  10308. CGF.EmitNullInitialization(DimsAddr, ArrayTy);
  10309. enum { LowerFD = 0, UpperFD, StrideFD };
  10310. // Fill dims with data.
  10311. for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
  10312. LValue DimsLVal = CGF.MakeAddrLValue(
  10313. CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
  10314. // dims.upper = num_iterations;
  10315. LValue UpperLVal = CGF.EmitLValueForField(
  10316. DimsLVal, *std::next(RD->field_begin(), UpperFD));
  10317. llvm::Value *NumIterVal =
  10318. CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
  10319. D.getNumIterations()->getType(), Int64Ty,
  10320. D.getNumIterations()->getExprLoc());
  10321. CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
  10322. // dims.stride = 1;
  10323. LValue StrideLVal = CGF.EmitLValueForField(
  10324. DimsLVal, *std::next(RD->field_begin(), StrideFD));
  10325. CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
  10326. StrideLVal);
  10327. }
  10328. // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
  10329. // kmp_int32 num_dims, struct kmp_dim * dims);
  10330. llvm::Value *Args[] = {
  10331. emitUpdateLocation(CGF, D.getBeginLoc()),
  10332. getThreadID(CGF, D.getBeginLoc()),
  10333. llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
  10334. CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  10335. CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
  10336. CGM.VoidPtrTy)};
  10337. llvm::FunctionCallee RTLFn =
  10338. createRuntimeFunction(OMPRTL__kmpc_doacross_init);
  10339. CGF.EmitRuntimeCall(RTLFn, Args);
  10340. llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
  10341. emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
  10342. llvm::FunctionCallee FiniRTLFn =
  10343. createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
  10344. CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
  10345. llvm::makeArrayRef(FiniArgs));
  10346. }
  10347. void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
  10348. const OMPDependClause *C) {
  10349. QualType Int64Ty =
  10350. CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
  10351. llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
  10352. QualType ArrayTy = CGM.getContext().getConstantArrayType(
  10353. Int64Ty, Size, nullptr, ArrayType::Normal, 0);
  10354. Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
  10355. for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
  10356. const Expr *CounterVal = C->getLoopData(I);
  10357. assert(CounterVal);
  10358. llvm::Value *CntVal = CGF.EmitScalarConversion(
  10359. CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
  10360. CounterVal->getExprLoc());
  10361. CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
  10362. /*Volatile=*/false, Int64Ty);
  10363. }
  10364. llvm::Value *Args[] = {
  10365. emitUpdateLocation(CGF, C->getBeginLoc()),
  10366. getThreadID(CGF, C->getBeginLoc()),
  10367. CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
  10368. llvm::FunctionCallee RTLFn;
  10369. if (C->getDependencyKind() == OMPC_DEPEND_source) {
  10370. RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
  10371. } else {
  10372. assert(C->getDependencyKind() == OMPC_DEPEND_sink);
  10373. RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
  10374. }
  10375. CGF.EmitRuntimeCall(RTLFn, Args);
  10376. }
  10377. void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
  10378. llvm::FunctionCallee Callee,
  10379. ArrayRef<llvm::Value *> Args) const {
  10380. assert(Loc.isValid() && "Outlined function call location must be valid.");
  10381. auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
  10382. if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
  10383. if (Fn->doesNotThrow()) {
  10384. CGF.EmitNounwindRuntimeCall(Fn, Args);
  10385. return;
  10386. }
  10387. }
  10388. CGF.EmitRuntimeCall(Callee, Args);
  10389. }
  10390. void CGOpenMPRuntime::emitOutlinedFunctionCall(
  10391. CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
  10392. ArrayRef<llvm::Value *> Args) const {
  10393. emitCall(CGF, Loc, OutlinedFn, Args);
  10394. }
  10395. void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
  10396. if (const auto *FD = dyn_cast<FunctionDecl>(D))
  10397. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
  10398. HasEmittedDeclareTargetRegion = true;
  10399. }
  10400. Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
  10401. const VarDecl *NativeParam,
  10402. const VarDecl *TargetParam) const {
  10403. return CGF.GetAddrOfLocalVar(NativeParam);
  10404. }
  10405. namespace {
  10406. /// Cleanup action for allocate support.
  10407. class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
  10408. public:
  10409. static const int CleanupArgs = 3;
  10410. private:
  10411. llvm::FunctionCallee RTLFn;
  10412. llvm::Value *Args[CleanupArgs];
  10413. public:
  10414. OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
  10415. ArrayRef<llvm::Value *> CallArgs)
  10416. : RTLFn(RTLFn) {
  10417. assert(CallArgs.size() == CleanupArgs &&
  10418. "Size of arguments does not match.");
  10419. std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
  10420. }
  10421. void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
  10422. if (!CGF.HaveInsertPoint())
  10423. return;
  10424. CGF.EmitRuntimeCall(RTLFn, Args);
  10425. }
  10426. };
  10427. } // namespace
  10428. Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
  10429. const VarDecl *VD) {
  10430. if (!VD)
  10431. return Address::invalid();
  10432. const VarDecl *CVD = VD->getCanonicalDecl();
  10433. if (!CVD->hasAttr<OMPAllocateDeclAttr>())
  10434. return Address::invalid();
  10435. const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
  10436. // Use the default allocation.
  10437. if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
  10438. !AA->getAllocator())
  10439. return Address::invalid();
  10440. llvm::Value *Size;
  10441. CharUnits Align = CGM.getContext().getDeclAlign(CVD);
  10442. if (CVD->getType()->isVariablyModifiedType()) {
  10443. Size = CGF.getTypeSize(CVD->getType());
  10444. // Align the size: ((size + align - 1) / align) * align
  10445. Size = CGF.Builder.CreateNUWAdd(
  10446. Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
  10447. Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
  10448. Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
  10449. } else {
  10450. CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
  10451. Size = CGM.getSize(Sz.alignTo(Align));
  10452. }
  10453. llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
  10454. assert(AA->getAllocator() &&
  10455. "Expected allocator expression for non-default allocator.");
  10456. llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
  10457. // According to the standard, the original allocator type is a enum (integer).
  10458. // Convert to pointer type, if required.
  10459. if (Allocator->getType()->isIntegerTy())
  10460. Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
  10461. else if (Allocator->getType()->isPointerTy())
  10462. Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
  10463. CGM.VoidPtrTy);
  10464. llvm::Value *Args[] = {ThreadID, Size, Allocator};
  10465. llvm::Value *Addr =
  10466. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
  10467. CVD->getName() + ".void.addr");
  10468. llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
  10469. Allocator};
  10470. llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
  10471. CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
  10472. llvm::makeArrayRef(FiniArgs));
  10473. Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
  10474. Addr,
  10475. CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
  10476. CVD->getName() + ".addr");
  10477. return Address(Addr, Align);
  10478. }
  10479. /// Checks current context and returns true if it matches the context selector.
  10480. template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
  10481. OMPDeclareVariantAttr::CtxSelectorType Ctx>
  10482. static bool checkContext(const OMPDeclareVariantAttr *A) {
  10483. assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
  10484. Ctx != OMPDeclareVariantAttr::CtxUnknown &&
  10485. "Unknown context selector or context selector set.");
  10486. return false;
  10487. }
  10488. /// Checks for implementation={vendor(<vendor>)} context selector.
  10489. /// \returns true iff <vendor>="llvm", false otherwise.
  10490. template <>
  10491. bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
  10492. OMPDeclareVariantAttr::CtxVendor>(
  10493. const OMPDeclareVariantAttr *A) {
  10494. return !A->getImplVendor().compare("llvm");
  10495. }
  10496. static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
  10497. // If both scores are unknown, choose the very first one.
  10498. if (!LHS && !RHS)
  10499. return true;
  10500. // If only one is known, return this one.
  10501. if (LHS && !RHS)
  10502. return true;
  10503. if (!LHS && RHS)
  10504. return false;
  10505. llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
  10506. llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
  10507. return llvm::APSInt::compareValues(LHSVal, RHSVal) <= 0;
  10508. }
  10509. namespace {
  10510. /// Comparator for the priority queue for context selector.
  10511. class OMPDeclareVariantAttrComparer
  10512. : public std::greater<const OMPDeclareVariantAttr *> {
  10513. private:
  10514. ASTContext &Ctx;
  10515. public:
  10516. OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
  10517. bool operator()(const OMPDeclareVariantAttr *LHS,
  10518. const OMPDeclareVariantAttr *RHS) const {
  10519. const Expr *LHSExpr = nullptr;
  10520. const Expr *RHSExpr = nullptr;
  10521. if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
  10522. LHSExpr = LHS->getScore();
  10523. if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
  10524. RHSExpr = RHS->getScore();
  10525. return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
  10526. }
  10527. };
  10528. } // anonymous namespace
  10529. /// Finds the variant function that matches current context with its context
  10530. /// selector.
  10531. static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
  10532. const FunctionDecl *FD) {
  10533. if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
  10534. return FD;
  10535. // Iterate through all DeclareVariant attributes and check context selectors.
  10536. auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
  10537. const OMPDeclareVariantAttr *RHS) {
  10538. const Expr *LHSExpr = nullptr;
  10539. const Expr *RHSExpr = nullptr;
  10540. if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
  10541. LHSExpr = LHS->getScore();
  10542. if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
  10543. RHSExpr = RHS->getScore();
  10544. return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
  10545. };
  10546. const OMPDeclareVariantAttr *TopMostAttr = nullptr;
  10547. for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
  10548. const OMPDeclareVariantAttr *SelectedAttr = nullptr;
  10549. switch (A->getCtxSelectorSet()) {
  10550. case OMPDeclareVariantAttr::CtxSetImplementation:
  10551. switch (A->getCtxSelector()) {
  10552. case OMPDeclareVariantAttr::CtxVendor:
  10553. if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
  10554. OMPDeclareVariantAttr::CtxVendor>(A))
  10555. SelectedAttr = A;
  10556. break;
  10557. case OMPDeclareVariantAttr::CtxUnknown:
  10558. llvm_unreachable(
  10559. "Unknown context selector in implementation selector set.");
  10560. }
  10561. break;
  10562. case OMPDeclareVariantAttr::CtxSetUnknown:
  10563. llvm_unreachable("Unknown context selector set.");
  10564. }
  10565. // If the attribute matches the context, find the attribute with the highest
  10566. // score.
  10567. if (SelectedAttr && (!TopMostAttr || Comparer(TopMostAttr, SelectedAttr)))
  10568. TopMostAttr = SelectedAttr;
  10569. }
  10570. if (!TopMostAttr)
  10571. return FD;
  10572. return cast<FunctionDecl>(
  10573. cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
  10574. ->getDecl());
  10575. }
  10576. bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
  10577. const auto *D = cast<FunctionDecl>(GD.getDecl());
  10578. // If the original function is defined already, use its definition.
  10579. StringRef MangledName = CGM.getMangledName(GD);
  10580. llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
  10581. if (Orig && !Orig->isDeclaration())
  10582. return false;
  10583. const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
  10584. // Emit original function if it does not have declare variant attribute or the
  10585. // context does not match.
  10586. if (NewFD == D)
  10587. return false;
  10588. GlobalDecl NewGD = GD.getWithDecl(NewFD);
  10589. if (tryEmitAlias(CGM, NewGD, GD, Orig, IsForDefinition)) {
  10590. DeferredVariantFunction.erase(D);
  10591. return true;
  10592. }
  10593. DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
  10594. return true;
  10595. }
  10596. llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
  10597. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  10598. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  10599. llvm_unreachable("Not supported in SIMD-only mode");
  10600. }
  10601. llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
  10602. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  10603. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
  10604. llvm_unreachable("Not supported in SIMD-only mode");
  10605. }
  10606. llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
  10607. const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
  10608. const VarDecl *PartIDVar, const VarDecl *TaskTVar,
  10609. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  10610. bool Tied, unsigned &NumberOfParts) {
  10611. llvm_unreachable("Not supported in SIMD-only mode");
  10612. }
  10613. void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
  10614. SourceLocation Loc,
  10615. llvm::Function *OutlinedFn,
  10616. ArrayRef<llvm::Value *> CapturedVars,
  10617. const Expr *IfCond) {
  10618. llvm_unreachable("Not supported in SIMD-only mode");
  10619. }
  10620. void CGOpenMPSIMDRuntime::emitCriticalRegion(
  10621. CodeGenFunction &CGF, StringRef CriticalName,
  10622. const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
  10623. const Expr *Hint) {
  10624. llvm_unreachable("Not supported in SIMD-only mode");
  10625. }
  10626. void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
  10627. const RegionCodeGenTy &MasterOpGen,
  10628. SourceLocation Loc) {
  10629. llvm_unreachable("Not supported in SIMD-only mode");
  10630. }
  10631. void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
  10632. SourceLocation Loc) {
  10633. llvm_unreachable("Not supported in SIMD-only mode");
  10634. }
  10635. void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
  10636. CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
  10637. SourceLocation Loc) {
  10638. llvm_unreachable("Not supported in SIMD-only mode");
  10639. }
  10640. void CGOpenMPSIMDRuntime::emitSingleRegion(
  10641. CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
  10642. SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
  10643. ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
  10644. ArrayRef<const Expr *> AssignmentOps) {
  10645. llvm_unreachable("Not supported in SIMD-only mode");
  10646. }
  10647. void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
  10648. const RegionCodeGenTy &OrderedOpGen,
  10649. SourceLocation Loc,
  10650. bool IsThreads) {
  10651. llvm_unreachable("Not supported in SIMD-only mode");
  10652. }
  10653. void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
  10654. SourceLocation Loc,
  10655. OpenMPDirectiveKind Kind,
  10656. bool EmitChecks,
  10657. bool ForceSimpleCall) {
  10658. llvm_unreachable("Not supported in SIMD-only mode");
  10659. }
  10660. void CGOpenMPSIMDRuntime::emitForDispatchInit(
  10661. CodeGenFunction &CGF, SourceLocation Loc,
  10662. const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
  10663. bool Ordered, const DispatchRTInput &DispatchValues) {
  10664. llvm_unreachable("Not supported in SIMD-only mode");
  10665. }
  10666. void CGOpenMPSIMDRuntime::emitForStaticInit(
  10667. CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
  10668. const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
  10669. llvm_unreachable("Not supported in SIMD-only mode");
  10670. }
  10671. void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
  10672. CodeGenFunction &CGF, SourceLocation Loc,
  10673. OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
  10674. llvm_unreachable("Not supported in SIMD-only mode");
  10675. }
  10676. void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
  10677. SourceLocation Loc,
  10678. unsigned IVSize,
  10679. bool IVSigned) {
  10680. llvm_unreachable("Not supported in SIMD-only mode");
  10681. }
  10682. void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
  10683. SourceLocation Loc,
  10684. OpenMPDirectiveKind DKind) {
  10685. llvm_unreachable("Not supported in SIMD-only mode");
  10686. }
  10687. llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
  10688. SourceLocation Loc,
  10689. unsigned IVSize, bool IVSigned,
  10690. Address IL, Address LB,
  10691. Address UB, Address ST) {
  10692. llvm_unreachable("Not supported in SIMD-only mode");
  10693. }
  10694. void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
  10695. llvm::Value *NumThreads,
  10696. SourceLocation Loc) {
  10697. llvm_unreachable("Not supported in SIMD-only mode");
  10698. }
  10699. void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
  10700. OpenMPProcBindClauseKind ProcBind,
  10701. SourceLocation Loc) {
  10702. llvm_unreachable("Not supported in SIMD-only mode");
  10703. }
  10704. Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
  10705. const VarDecl *VD,
  10706. Address VDAddr,
  10707. SourceLocation Loc) {
  10708. llvm_unreachable("Not supported in SIMD-only mode");
  10709. }
  10710. llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
  10711. const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
  10712. CodeGenFunction *CGF) {
  10713. llvm_unreachable("Not supported in SIMD-only mode");
  10714. }
  10715. Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
  10716. CodeGenFunction &CGF, QualType VarType, StringRef Name) {
  10717. llvm_unreachable("Not supported in SIMD-only mode");
  10718. }
  10719. void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
  10720. ArrayRef<const Expr *> Vars,
  10721. SourceLocation Loc) {
  10722. llvm_unreachable("Not supported in SIMD-only mode");
  10723. }
  10724. void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
  10725. const OMPExecutableDirective &D,
  10726. llvm::Function *TaskFunction,
  10727. QualType SharedsTy, Address Shareds,
  10728. const Expr *IfCond,
  10729. const OMPTaskDataTy &Data) {
  10730. llvm_unreachable("Not supported in SIMD-only mode");
  10731. }
  10732. void CGOpenMPSIMDRuntime::emitTaskLoopCall(
  10733. CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
  10734. llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
  10735. const Expr *IfCond, const OMPTaskDataTy &Data) {
  10736. llvm_unreachable("Not supported in SIMD-only mode");
  10737. }
  10738. void CGOpenMPSIMDRuntime::emitReduction(
  10739. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
  10740. ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
  10741. ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
  10742. assert(Options.SimpleReduction && "Only simple reduction is expected.");
  10743. CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
  10744. ReductionOps, Options);
  10745. }
  10746. llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
  10747. CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
  10748. ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
  10749. llvm_unreachable("Not supported in SIMD-only mode");
  10750. }
  10751. void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
  10752. SourceLocation Loc,
  10753. ReductionCodeGen &RCG,
  10754. unsigned N) {
  10755. llvm_unreachable("Not supported in SIMD-only mode");
  10756. }
  10757. Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
  10758. SourceLocation Loc,
  10759. llvm::Value *ReductionsPtr,
  10760. LValue SharedLVal) {
  10761. llvm_unreachable("Not supported in SIMD-only mode");
  10762. }
  10763. void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
  10764. SourceLocation Loc) {
  10765. llvm_unreachable("Not supported in SIMD-only mode");
  10766. }
  10767. void CGOpenMPSIMDRuntime::emitCancellationPointCall(
  10768. CodeGenFunction &CGF, SourceLocation Loc,
  10769. OpenMPDirectiveKind CancelRegion) {
  10770. llvm_unreachable("Not supported in SIMD-only mode");
  10771. }
  10772. void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
  10773. SourceLocation Loc, const Expr *IfCond,
  10774. OpenMPDirectiveKind CancelRegion) {
  10775. llvm_unreachable("Not supported in SIMD-only mode");
  10776. }
  10777. void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
  10778. const OMPExecutableDirective &D, StringRef ParentName,
  10779. llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
  10780. bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
  10781. llvm_unreachable("Not supported in SIMD-only mode");
  10782. }
  10783. void CGOpenMPSIMDRuntime::emitTargetCall(
  10784. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  10785. llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
  10786. const Expr *Device,
  10787. llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
  10788. const OMPLoopDirective &D)>
  10789. SizeEmitter) {
  10790. llvm_unreachable("Not supported in SIMD-only mode");
  10791. }
  10792. bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
  10793. llvm_unreachable("Not supported in SIMD-only mode");
  10794. }
  10795. bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
  10796. llvm_unreachable("Not supported in SIMD-only mode");
  10797. }
  10798. bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
  10799. return false;
  10800. }
  10801. llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
  10802. return nullptr;
  10803. }
  10804. void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
  10805. const OMPExecutableDirective &D,
  10806. SourceLocation Loc,
  10807. llvm::Function *OutlinedFn,
  10808. ArrayRef<llvm::Value *> CapturedVars) {
  10809. llvm_unreachable("Not supported in SIMD-only mode");
  10810. }
  10811. void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
  10812. const Expr *NumTeams,
  10813. const Expr *ThreadLimit,
  10814. SourceLocation Loc) {
  10815. llvm_unreachable("Not supported in SIMD-only mode");
  10816. }
  10817. void CGOpenMPSIMDRuntime::emitTargetDataCalls(
  10818. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  10819. const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
  10820. llvm_unreachable("Not supported in SIMD-only mode");
  10821. }
  10822. void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
  10823. CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
  10824. const Expr *Device) {
  10825. llvm_unreachable("Not supported in SIMD-only mode");
  10826. }
  10827. void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
  10828. const OMPLoopDirective &D,
  10829. ArrayRef<Expr *> NumIterations) {
  10830. llvm_unreachable("Not supported in SIMD-only mode");
  10831. }
  10832. void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
  10833. const OMPDependClause *C) {
  10834. llvm_unreachable("Not supported in SIMD-only mode");
  10835. }
  10836. const VarDecl *
  10837. CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
  10838. const VarDecl *NativeParam) const {
  10839. llvm_unreachable("Not supported in SIMD-only mode");
  10840. }
  10841. Address
  10842. CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
  10843. const VarDecl *NativeParam,
  10844. const VarDecl *TargetParam) const {
  10845. llvm_unreachable("Not supported in SIMD-only mode");
  10846. }