CGStmtOpenMP.cpp 217 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192
  1. //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This contains code to emit OpenMP nodes as LLVM code.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "CGCleanup.h"
  13. #include "CGOpenMPRuntime.h"
  14. #include "CodeGenFunction.h"
  15. #include "CodeGenModule.h"
  16. #include "TargetInfo.h"
  17. #include "clang/AST/Stmt.h"
  18. #include "clang/AST/StmtOpenMP.h"
  19. #include "clang/AST/DeclOpenMP.h"
  20. using namespace clang;
  21. using namespace CodeGen;
  22. namespace {
  23. /// Lexical scope for OpenMP executable constructs, that handles correct codegen
  24. /// for captured expressions.
  25. class OMPLexicalScope : public CodeGenFunction::LexicalScope {
  26. void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  27. for (const auto *C : S.clauses()) {
  28. if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
  29. if (const auto *PreInit =
  30. cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
  31. for (const auto *I : PreInit->decls()) {
  32. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  33. CGF.EmitVarDecl(cast<VarDecl>(*I));
  34. } else {
  35. CodeGenFunction::AutoVarEmission Emission =
  36. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  37. CGF.EmitAutoVarCleanups(Emission);
  38. }
  39. }
  40. }
  41. }
  42. }
  43. }
  44. CodeGenFunction::OMPPrivateScope InlinedShareds;
  45. static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
  46. return CGF.LambdaCaptureFields.lookup(VD) ||
  47. (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
  48. (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
  49. }
  50. public:
  51. OMPLexicalScope(
  52. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  53. const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
  54. const bool EmitPreInitStmt = true)
  55. : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
  56. InlinedShareds(CGF) {
  57. if (EmitPreInitStmt)
  58. emitPreInitStmt(CGF, S);
  59. if (!CapturedRegion.hasValue())
  60. return;
  61. assert(S.hasAssociatedStmt() &&
  62. "Expected associated statement for inlined directive.");
  63. const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
  64. for (const auto &C : CS->captures()) {
  65. if (C.capturesVariable() || C.capturesVariableByCopy()) {
  66. auto *VD = C.getCapturedVar();
  67. assert(VD == VD->getCanonicalDecl() &&
  68. "Canonical decl must be captured.");
  69. DeclRefExpr DRE(
  70. CGF.getContext(), const_cast<VarDecl *>(VD),
  71. isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
  72. InlinedShareds.isGlobalVarCaptured(VD)),
  73. VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
  74. InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
  75. return CGF.EmitLValue(&DRE).getAddress();
  76. });
  77. }
  78. }
  79. (void)InlinedShareds.Privatize();
  80. }
  81. };
  82. /// Lexical scope for OpenMP parallel construct, that handles correct codegen
  83. /// for captured expressions.
  84. class OMPParallelScope final : public OMPLexicalScope {
  85. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  86. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  87. return !(isOpenMPTargetExecutionDirective(Kind) ||
  88. isOpenMPLoopBoundSharingDirective(Kind)) &&
  89. isOpenMPParallelDirective(Kind);
  90. }
  91. public:
  92. OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  93. : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
  94. EmitPreInitStmt(S)) {}
  95. };
  96. /// Lexical scope for OpenMP teams construct, that handles correct codegen
  97. /// for captured expressions.
  98. class OMPTeamsScope final : public OMPLexicalScope {
  99. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  100. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  101. return !isOpenMPTargetExecutionDirective(Kind) &&
  102. isOpenMPTeamsDirective(Kind);
  103. }
  104. public:
  105. OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  106. : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
  107. EmitPreInitStmt(S)) {}
  108. };
  109. /// Private scope for OpenMP loop-based directives, that supports capturing
  110. /// of used expression from loop statement.
  111. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
  112. void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
  113. CodeGenFunction::OMPMapVars PreCondVars;
  114. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  115. for (const auto *E : S.counters()) {
  116. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  117. EmittedAsPrivate.insert(VD->getCanonicalDecl());
  118. (void)PreCondVars.setVarAddr(
  119. CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
  120. }
  121. // Mark private vars as undefs.
  122. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
  123. for (const Expr *IRef : C->varlists()) {
  124. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
  125. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  126. (void)PreCondVars.setVarAddr(
  127. CGF, OrigVD,
  128. Address(llvm::UndefValue::get(
  129. CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
  130. OrigVD->getType().getNonReferenceType()))),
  131. CGF.getContext().getDeclAlign(OrigVD)));
  132. }
  133. }
  134. }
  135. (void)PreCondVars.apply(CGF);
  136. // Emit init, __range and __end variables for C++ range loops.
  137. const Stmt *Body =
  138. S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
  139. for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
  140. Body = Body->IgnoreContainers();
  141. if (auto *For = dyn_cast<ForStmt>(Body)) {
  142. Body = For->getBody();
  143. } else {
  144. assert(isa<CXXForRangeStmt>(Body) &&
  145. "Expected canonical for loop or range-based for loop.");
  146. auto *CXXFor = cast<CXXForRangeStmt>(Body);
  147. if (const Stmt *Init = CXXFor->getInit())
  148. CGF.EmitStmt(Init);
  149. CGF.EmitStmt(CXXFor->getRangeStmt());
  150. CGF.EmitStmt(CXXFor->getEndStmt());
  151. Body = CXXFor->getBody();
  152. }
  153. }
  154. if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
  155. for (const auto *I : PreInits->decls())
  156. CGF.EmitVarDecl(cast<VarDecl>(*I));
  157. }
  158. PreCondVars.restore(CGF);
  159. }
  160. public:
  161. OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
  162. : CodeGenFunction::RunCleanupsScope(CGF) {
  163. emitPreInitStmt(CGF, S);
  164. }
  165. };
  166. class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
  167. CodeGenFunction::OMPPrivateScope InlinedShareds;
  168. static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
  169. return CGF.LambdaCaptureFields.lookup(VD) ||
  170. (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
  171. (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
  172. cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
  173. }
  174. public:
  175. OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  176. : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
  177. InlinedShareds(CGF) {
  178. for (const auto *C : S.clauses()) {
  179. if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
  180. if (const auto *PreInit =
  181. cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
  182. for (const auto *I : PreInit->decls()) {
  183. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  184. CGF.EmitVarDecl(cast<VarDecl>(*I));
  185. } else {
  186. CodeGenFunction::AutoVarEmission Emission =
  187. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  188. CGF.EmitAutoVarCleanups(Emission);
  189. }
  190. }
  191. }
  192. } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
  193. for (const Expr *E : UDP->varlists()) {
  194. const Decl *D = cast<DeclRefExpr>(E)->getDecl();
  195. if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
  196. CGF.EmitVarDecl(*OED);
  197. }
  198. }
  199. }
  200. if (!isOpenMPSimdDirective(S.getDirectiveKind()))
  201. CGF.EmitOMPPrivateClause(S, InlinedShareds);
  202. if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
  203. if (const Expr *E = TG->getReductionRef())
  204. CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
  205. }
  206. const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
  207. while (CS) {
  208. for (auto &C : CS->captures()) {
  209. if (C.capturesVariable() || C.capturesVariableByCopy()) {
  210. auto *VD = C.getCapturedVar();
  211. assert(VD == VD->getCanonicalDecl() &&
  212. "Canonical decl must be captured.");
  213. DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
  214. isCapturedVar(CGF, VD) ||
  215. (CGF.CapturedStmtInfo &&
  216. InlinedShareds.isGlobalVarCaptured(VD)),
  217. VD->getType().getNonReferenceType(), VK_LValue,
  218. C.getLocation());
  219. InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
  220. return CGF.EmitLValue(&DRE).getAddress();
  221. });
  222. }
  223. }
  224. CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
  225. }
  226. (void)InlinedShareds.Privatize();
  227. }
  228. };
  229. } // namespace
  230. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  231. const OMPExecutableDirective &S,
  232. const RegionCodeGenTy &CodeGen);
  233. LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
  234. if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
  235. if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
  236. OrigVD = OrigVD->getCanonicalDecl();
  237. bool IsCaptured =
  238. LambdaCaptureFields.lookup(OrigVD) ||
  239. (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
  240. (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
  241. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
  242. OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
  243. return EmitLValue(&DRE);
  244. }
  245. }
  246. return EmitLValue(E);
  247. }
  248. llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
  249. ASTContext &C = getContext();
  250. llvm::Value *Size = nullptr;
  251. auto SizeInChars = C.getTypeSizeInChars(Ty);
  252. if (SizeInChars.isZero()) {
  253. // getTypeSizeInChars() returns 0 for a VLA.
  254. while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
  255. VlaSizePair VlaSize = getVLASize(VAT);
  256. Ty = VlaSize.Type;
  257. Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
  258. : VlaSize.NumElts;
  259. }
  260. SizeInChars = C.getTypeSizeInChars(Ty);
  261. if (SizeInChars.isZero())
  262. return llvm::ConstantInt::get(SizeTy, /*V=*/0);
  263. return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
  264. }
  265. return CGM.getSize(SizeInChars);
  266. }
  267. void CodeGenFunction::GenerateOpenMPCapturedVars(
  268. const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
  269. const RecordDecl *RD = S.getCapturedRecordDecl();
  270. auto CurField = RD->field_begin();
  271. auto CurCap = S.captures().begin();
  272. for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
  273. E = S.capture_init_end();
  274. I != E; ++I, ++CurField, ++CurCap) {
  275. if (CurField->hasCapturedVLAType()) {
  276. const VariableArrayType *VAT = CurField->getCapturedVLAType();
  277. llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
  278. CapturedVars.push_back(Val);
  279. } else if (CurCap->capturesThis()) {
  280. CapturedVars.push_back(CXXThisValue);
  281. } else if (CurCap->capturesVariableByCopy()) {
  282. llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
  283. // If the field is not a pointer, we need to save the actual value
  284. // and load it as a void pointer.
  285. if (!CurField->getType()->isAnyPointerType()) {
  286. ASTContext &Ctx = getContext();
  287. Address DstAddr = CreateMemTemp(
  288. Ctx.getUIntPtrType(),
  289. Twine(CurCap->getCapturedVar()->getName(), ".casted"));
  290. LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
  291. llvm::Value *SrcAddrVal = EmitScalarConversion(
  292. DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
  293. Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
  294. LValue SrcLV =
  295. MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
  296. // Store the value using the source type pointer.
  297. EmitStoreThroughLValue(RValue::get(CV), SrcLV);
  298. // Load the value using the destination type pointer.
  299. CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
  300. }
  301. CapturedVars.push_back(CV);
  302. } else {
  303. assert(CurCap->capturesVariable() && "Expected capture by reference.");
  304. CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
  305. }
  306. }
  307. }
  308. static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
  309. QualType DstType, StringRef Name,
  310. LValue AddrLV) {
  311. ASTContext &Ctx = CGF.getContext();
  312. llvm::Value *CastedPtr = CGF.EmitScalarConversion(
  313. AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
  314. Ctx.getPointerType(DstType), Loc);
  315. Address TmpAddr =
  316. CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
  317. .getAddress();
  318. return TmpAddr;
  319. }
  320. static QualType getCanonicalParamType(ASTContext &C, QualType T) {
  321. if (T->isLValueReferenceType())
  322. return C.getLValueReferenceType(
  323. getCanonicalParamType(C, T.getNonReferenceType()),
  324. /*SpelledAsLValue=*/false);
  325. if (T->isPointerType())
  326. return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
  327. if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
  328. if (const auto *VLA = dyn_cast<VariableArrayType>(A))
  329. return getCanonicalParamType(C, VLA->getElementType());
  330. if (!A->isVariablyModifiedType())
  331. return C.getCanonicalType(T);
  332. }
  333. return C.getCanonicalParamType(T);
  334. }
  335. namespace {
  336. /// Contains required data for proper outlined function codegen.
  337. struct FunctionOptions {
  338. /// Captured statement for which the function is generated.
  339. const CapturedStmt *S = nullptr;
  340. /// true if cast to/from UIntPtr is required for variables captured by
  341. /// value.
  342. const bool UIntPtrCastRequired = true;
  343. /// true if only casted arguments must be registered as local args or VLA
  344. /// sizes.
  345. const bool RegisterCastedArgsOnly = false;
  346. /// Name of the generated function.
  347. const StringRef FunctionName;
  348. explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
  349. bool RegisterCastedArgsOnly,
  350. StringRef FunctionName)
  351. : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
  352. RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
  353. FunctionName(FunctionName) {}
  354. };
  355. }
  356. static llvm::Function *emitOutlinedFunctionPrologue(
  357. CodeGenFunction &CGF, FunctionArgList &Args,
  358. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
  359. &LocalAddrs,
  360. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
  361. &VLASizes,
  362. llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
  363. const CapturedDecl *CD = FO.S->getCapturedDecl();
  364. const RecordDecl *RD = FO.S->getCapturedRecordDecl();
  365. assert(CD->hasBody() && "missing CapturedDecl body");
  366. CXXThisValue = nullptr;
  367. // Build the argument list.
  368. CodeGenModule &CGM = CGF.CGM;
  369. ASTContext &Ctx = CGM.getContext();
  370. FunctionArgList TargetArgs;
  371. Args.append(CD->param_begin(),
  372. std::next(CD->param_begin(), CD->getContextParamPosition()));
  373. TargetArgs.append(
  374. CD->param_begin(),
  375. std::next(CD->param_begin(), CD->getContextParamPosition()));
  376. auto I = FO.S->captures().begin();
  377. FunctionDecl *DebugFunctionDecl = nullptr;
  378. if (!FO.UIntPtrCastRequired) {
  379. FunctionProtoType::ExtProtoInfo EPI;
  380. QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
  381. DebugFunctionDecl = FunctionDecl::Create(
  382. Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
  383. SourceLocation(), DeclarationName(), FunctionTy,
  384. Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
  385. /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
  386. }
  387. for (const FieldDecl *FD : RD->fields()) {
  388. QualType ArgType = FD->getType();
  389. IdentifierInfo *II = nullptr;
  390. VarDecl *CapVar = nullptr;
  391. // If this is a capture by copy and the type is not a pointer, the outlined
  392. // function argument type should be uintptr and the value properly casted to
  393. // uintptr. This is necessary given that the runtime library is only able to
  394. // deal with pointers. We can pass in the same way the VLA type sizes to the
  395. // outlined function.
  396. if (FO.UIntPtrCastRequired &&
  397. ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
  398. I->capturesVariableArrayType()))
  399. ArgType = Ctx.getUIntPtrType();
  400. if (I->capturesVariable() || I->capturesVariableByCopy()) {
  401. CapVar = I->getCapturedVar();
  402. II = CapVar->getIdentifier();
  403. } else if (I->capturesThis()) {
  404. II = &Ctx.Idents.get("this");
  405. } else {
  406. assert(I->capturesVariableArrayType());
  407. II = &Ctx.Idents.get("vla");
  408. }
  409. if (ArgType->isVariablyModifiedType())
  410. ArgType = getCanonicalParamType(Ctx, ArgType);
  411. VarDecl *Arg;
  412. if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
  413. Arg = ParmVarDecl::Create(
  414. Ctx, DebugFunctionDecl,
  415. CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
  416. CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
  417. /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
  418. } else {
  419. Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
  420. II, ArgType, ImplicitParamDecl::Other);
  421. }
  422. Args.emplace_back(Arg);
  423. // Do not cast arguments if we emit function with non-original types.
  424. TargetArgs.emplace_back(
  425. FO.UIntPtrCastRequired
  426. ? Arg
  427. : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
  428. ++I;
  429. }
  430. Args.append(
  431. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  432. CD->param_end());
  433. TargetArgs.append(
  434. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  435. CD->param_end());
  436. // Create the function declaration.
  437. const CGFunctionInfo &FuncInfo =
  438. CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
  439. llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
  440. auto *F =
  441. llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
  442. FO.FunctionName, &CGM.getModule());
  443. CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
  444. if (CD->isNothrow())
  445. F->setDoesNotThrow();
  446. F->setDoesNotRecurse();
  447. // Generate the function.
  448. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
  449. FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
  450. unsigned Cnt = CD->getContextParamPosition();
  451. I = FO.S->captures().begin();
  452. for (const FieldDecl *FD : RD->fields()) {
  453. // Do not map arguments if we emit function with non-original types.
  454. Address LocalAddr(Address::invalid());
  455. if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
  456. LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
  457. TargetArgs[Cnt]);
  458. } else {
  459. LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
  460. }
  461. // If we are capturing a pointer by copy we don't need to do anything, just
  462. // use the value that we get from the arguments.
  463. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
  464. const VarDecl *CurVD = I->getCapturedVar();
  465. if (!FO.RegisterCastedArgsOnly)
  466. LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
  467. ++Cnt;
  468. ++I;
  469. continue;
  470. }
  471. LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
  472. AlignmentSource::Decl);
  473. if (FD->hasCapturedVLAType()) {
  474. if (FO.UIntPtrCastRequired) {
  475. ArgLVal = CGF.MakeAddrLValue(
  476. castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
  477. Args[Cnt]->getName(), ArgLVal),
  478. FD->getType(), AlignmentSource::Decl);
  479. }
  480. llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
  481. const VariableArrayType *VAT = FD->getCapturedVLAType();
  482. VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
  483. } else if (I->capturesVariable()) {
  484. const VarDecl *Var = I->getCapturedVar();
  485. QualType VarTy = Var->getType();
  486. Address ArgAddr = ArgLVal.getAddress();
  487. if (ArgLVal.getType()->isLValueReferenceType()) {
  488. ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
  489. } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
  490. assert(ArgLVal.getType()->isPointerType());
  491. ArgAddr = CGF.EmitLoadOfPointer(
  492. ArgAddr, ArgLVal.getType()->castAs<PointerType>());
  493. }
  494. if (!FO.RegisterCastedArgsOnly) {
  495. LocalAddrs.insert(
  496. {Args[Cnt],
  497. {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
  498. }
  499. } else if (I->capturesVariableByCopy()) {
  500. assert(!FD->getType()->isAnyPointerType() &&
  501. "Not expecting a captured pointer.");
  502. const VarDecl *Var = I->getCapturedVar();
  503. LocalAddrs.insert({Args[Cnt],
  504. {Var, FO.UIntPtrCastRequired
  505. ? castValueFromUintptr(
  506. CGF, I->getLocation(), FD->getType(),
  507. Args[Cnt]->getName(), ArgLVal)
  508. : ArgLVal.getAddress()}});
  509. } else {
  510. // If 'this' is captured, load it into CXXThisValue.
  511. assert(I->capturesThis());
  512. CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
  513. LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
  514. }
  515. ++Cnt;
  516. ++I;
  517. }
  518. return F;
  519. }
  520. llvm::Function *
  521. CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
  522. assert(
  523. CapturedStmtInfo &&
  524. "CapturedStmtInfo should be set when generating the captured function");
  525. const CapturedDecl *CD = S.getCapturedDecl();
  526. // Build the argument list.
  527. bool NeedWrapperFunction =
  528. getDebugInfo() &&
  529. CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
  530. FunctionArgList Args;
  531. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
  532. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
  533. SmallString<256> Buffer;
  534. llvm::raw_svector_ostream Out(Buffer);
  535. Out << CapturedStmtInfo->getHelperName();
  536. if (NeedWrapperFunction)
  537. Out << "_debug__";
  538. FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
  539. Out.str());
  540. llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
  541. VLASizes, CXXThisValue, FO);
  542. CodeGenFunction::OMPPrivateScope LocalScope(*this);
  543. for (const auto &LocalAddrPair : LocalAddrs) {
  544. if (LocalAddrPair.second.first) {
  545. LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
  546. return LocalAddrPair.second.second;
  547. });
  548. }
  549. }
  550. (void)LocalScope.Privatize();
  551. for (const auto &VLASizePair : VLASizes)
  552. VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
  553. PGO.assignRegionCounters(GlobalDecl(CD), F);
  554. CapturedStmtInfo->EmitBody(*this, CD->getBody());
  555. (void)LocalScope.ForceCleanup();
  556. FinishFunction(CD->getBodyRBrace());
  557. if (!NeedWrapperFunction)
  558. return F;
  559. FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
  560. /*RegisterCastedArgsOnly=*/true,
  561. CapturedStmtInfo->getHelperName());
  562. CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
  563. WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
  564. Args.clear();
  565. LocalAddrs.clear();
  566. VLASizes.clear();
  567. llvm::Function *WrapperF =
  568. emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
  569. WrapperCGF.CXXThisValue, WrapperFO);
  570. llvm::SmallVector<llvm::Value *, 4> CallArgs;
  571. for (const auto *Arg : Args) {
  572. llvm::Value *CallArg;
  573. auto I = LocalAddrs.find(Arg);
  574. if (I != LocalAddrs.end()) {
  575. LValue LV = WrapperCGF.MakeAddrLValue(
  576. I->second.second,
  577. I->second.first ? I->second.first->getType() : Arg->getType(),
  578. AlignmentSource::Decl);
  579. CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
  580. } else {
  581. auto EI = VLASizes.find(Arg);
  582. if (EI != VLASizes.end()) {
  583. CallArg = EI->second.second;
  584. } else {
  585. LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
  586. Arg->getType(),
  587. AlignmentSource::Decl);
  588. CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
  589. }
  590. }
  591. CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
  592. }
  593. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
  594. F, CallArgs);
  595. WrapperCGF.FinishFunction();
  596. return WrapperF;
  597. }
  598. //===----------------------------------------------------------------------===//
  599. // OpenMP Directive Emission
  600. //===----------------------------------------------------------------------===//
  601. void CodeGenFunction::EmitOMPAggregateAssign(
  602. Address DestAddr, Address SrcAddr, QualType OriginalType,
  603. const llvm::function_ref<void(Address, Address)> CopyGen) {
  604. // Perform element-by-element initialization.
  605. QualType ElementTy;
  606. // Drill down to the base element type on both arrays.
  607. const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
  608. llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
  609. SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
  610. llvm::Value *SrcBegin = SrcAddr.getPointer();
  611. llvm::Value *DestBegin = DestAddr.getPointer();
  612. // Cast from pointer to array type to pointer to single element.
  613. llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
  614. // The basic structure here is a while-do loop.
  615. llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
  616. llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
  617. llvm::Value *IsEmpty =
  618. Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
  619. Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  620. // Enter the loop body, making that address the current address.
  621. llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
  622. EmitBlock(BodyBB);
  623. CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
  624. llvm::PHINode *SrcElementPHI =
  625. Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
  626. SrcElementPHI->addIncoming(SrcBegin, EntryBB);
  627. Address SrcElementCurrent =
  628. Address(SrcElementPHI,
  629. SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  630. llvm::PHINode *DestElementPHI =
  631. Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
  632. DestElementPHI->addIncoming(DestBegin, EntryBB);
  633. Address DestElementCurrent =
  634. Address(DestElementPHI,
  635. DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  636. // Emit copy.
  637. CopyGen(DestElementCurrent, SrcElementCurrent);
  638. // Shift the address forward by one element.
  639. llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
  640. DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  641. llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
  642. SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
  643. // Check whether we've reached the end.
  644. llvm::Value *Done =
  645. Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
  646. Builder.CreateCondBr(Done, DoneBB, BodyBB);
  647. DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
  648. SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
  649. // Done.
  650. EmitBlock(DoneBB, /*IsFinished=*/true);
  651. }
  652. void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
  653. Address SrcAddr, const VarDecl *DestVD,
  654. const VarDecl *SrcVD, const Expr *Copy) {
  655. if (OriginalType->isArrayType()) {
  656. const auto *BO = dyn_cast<BinaryOperator>(Copy);
  657. if (BO && BO->getOpcode() == BO_Assign) {
  658. // Perform simple memcpy for simple copying.
  659. LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
  660. LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
  661. EmitAggregateAssign(Dest, Src, OriginalType);
  662. } else {
  663. // For arrays with complex element types perform element by element
  664. // copying.
  665. EmitOMPAggregateAssign(
  666. DestAddr, SrcAddr, OriginalType,
  667. [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
  668. // Working with the single array element, so have to remap
  669. // destination and source variables to corresponding array
  670. // elements.
  671. CodeGenFunction::OMPPrivateScope Remap(*this);
  672. Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
  673. Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
  674. (void)Remap.Privatize();
  675. EmitIgnoredExpr(Copy);
  676. });
  677. }
  678. } else {
  679. // Remap pseudo source variable to private copy.
  680. CodeGenFunction::OMPPrivateScope Remap(*this);
  681. Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
  682. Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
  683. (void)Remap.Privatize();
  684. // Emit copying of the whole variable.
  685. EmitIgnoredExpr(Copy);
  686. }
  687. }
  688. bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
  689. OMPPrivateScope &PrivateScope) {
  690. if (!HaveInsertPoint())
  691. return false;
  692. bool DeviceConstTarget =
  693. getLangOpts().OpenMPIsDevice &&
  694. isOpenMPTargetExecutionDirective(D.getDirectiveKind());
  695. bool FirstprivateIsLastprivate = false;
  696. llvm::DenseSet<const VarDecl *> Lastprivates;
  697. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  698. for (const auto *D : C->varlists())
  699. Lastprivates.insert(
  700. cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
  701. }
  702. llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
  703. llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
  704. getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
  705. // Force emission of the firstprivate copy if the directive does not emit
  706. // outlined function, like omp for, omp simd, omp distribute etc.
  707. bool MustEmitFirstprivateCopy =
  708. CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
  709. for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
  710. auto IRef = C->varlist_begin();
  711. auto InitsRef = C->inits().begin();
  712. for (const Expr *IInit : C->private_copies()) {
  713. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  714. bool ThisFirstprivateIsLastprivate =
  715. Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
  716. const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
  717. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  718. if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
  719. !FD->getType()->isReferenceType() &&
  720. (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
  721. EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
  722. ++IRef;
  723. ++InitsRef;
  724. continue;
  725. }
  726. // Do not emit copy for firstprivate constant variables in target regions,
  727. // captured by reference.
  728. if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
  729. FD && FD->getType()->isReferenceType() &&
  730. (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
  731. (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
  732. OrigVD);
  733. ++IRef;
  734. ++InitsRef;
  735. continue;
  736. }
  737. FirstprivateIsLastprivate =
  738. FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
  739. if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
  740. const auto *VDInit =
  741. cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
  742. bool IsRegistered;
  743. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
  744. /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
  745. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  746. LValue OriginalLVal;
  747. if (!FD) {
  748. // Check if the firstprivate variable is just a constant value.
  749. ConstantEmission CE = tryEmitAsConstant(&DRE);
  750. if (CE && !CE.isReference()) {
  751. // Constant value, no need to create a copy.
  752. ++IRef;
  753. ++InitsRef;
  754. continue;
  755. }
  756. if (CE && CE.isReference()) {
  757. OriginalLVal = CE.getReferenceLValue(*this, &DRE);
  758. } else {
  759. assert(!CE && "Expected non-constant firstprivate.");
  760. OriginalLVal = EmitLValue(&DRE);
  761. }
  762. } else {
  763. OriginalLVal = EmitLValue(&DRE);
  764. }
  765. QualType Type = VD->getType();
  766. if (Type->isArrayType()) {
  767. // Emit VarDecl with copy init for arrays.
  768. // Get the address of the original variable captured in current
  769. // captured region.
  770. IsRegistered = PrivateScope.addPrivate(
  771. OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
  772. AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
  773. const Expr *Init = VD->getInit();
  774. if (!isa<CXXConstructExpr>(Init) ||
  775. isTrivialInitializer(Init)) {
  776. // Perform simple memcpy.
  777. LValue Dest =
  778. MakeAddrLValue(Emission.getAllocatedAddress(), Type);
  779. EmitAggregateAssign(Dest, OriginalLVal, Type);
  780. } else {
  781. EmitOMPAggregateAssign(
  782. Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
  783. Type,
  784. [this, VDInit, Init](Address DestElement,
  785. Address SrcElement) {
  786. // Clean up any temporaries needed by the
  787. // initialization.
  788. RunCleanupsScope InitScope(*this);
  789. // Emit initialization for single element.
  790. setAddrOfLocalVar(VDInit, SrcElement);
  791. EmitAnyExprToMem(Init, DestElement,
  792. Init->getType().getQualifiers(),
  793. /*IsInitializer*/ false);
  794. LocalDeclMap.erase(VDInit);
  795. });
  796. }
  797. EmitAutoVarCleanups(Emission);
  798. return Emission.getAllocatedAddress();
  799. });
  800. } else {
  801. Address OriginalAddr = OriginalLVal.getAddress();
  802. IsRegistered = PrivateScope.addPrivate(
  803. OrigVD, [this, VDInit, OriginalAddr, VD]() {
  804. // Emit private VarDecl with copy init.
  805. // Remap temp VDInit variable to the address of the original
  806. // variable (for proper handling of captured global variables).
  807. setAddrOfLocalVar(VDInit, OriginalAddr);
  808. EmitDecl(*VD);
  809. LocalDeclMap.erase(VDInit);
  810. return GetAddrOfLocalVar(VD);
  811. });
  812. }
  813. assert(IsRegistered &&
  814. "firstprivate var already registered as private");
  815. // Silence the warning about unused variable.
  816. (void)IsRegistered;
  817. }
  818. ++IRef;
  819. ++InitsRef;
  820. }
  821. }
  822. return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
  823. }
  824. void CodeGenFunction::EmitOMPPrivateClause(
  825. const OMPExecutableDirective &D,
  826. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  827. if (!HaveInsertPoint())
  828. return;
  829. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  830. for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
  831. auto IRef = C->varlist_begin();
  832. for (const Expr *IInit : C->private_copies()) {
  833. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  834. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  835. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  836. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
  837. // Emit private VarDecl with copy init.
  838. EmitDecl(*VD);
  839. return GetAddrOfLocalVar(VD);
  840. });
  841. assert(IsRegistered && "private var already registered as private");
  842. // Silence the warning about unused variable.
  843. (void)IsRegistered;
  844. }
  845. ++IRef;
  846. }
  847. }
  848. }
  849. bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
  850. if (!HaveInsertPoint())
  851. return false;
  852. // threadprivate_var1 = master_threadprivate_var1;
  853. // operator=(threadprivate_var2, master_threadprivate_var2);
  854. // ...
  855. // __kmpc_barrier(&loc, global_tid);
  856. llvm::DenseSet<const VarDecl *> CopiedVars;
  857. llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
  858. for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
  859. auto IRef = C->varlist_begin();
  860. auto ISrcRef = C->source_exprs().begin();
  861. auto IDestRef = C->destination_exprs().begin();
  862. for (const Expr *AssignOp : C->assignment_ops()) {
  863. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  864. QualType Type = VD->getType();
  865. if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
  866. // Get the address of the master variable. If we are emitting code with
  867. // TLS support, the address is passed from the master as field in the
  868. // captured declaration.
  869. Address MasterAddr = Address::invalid();
  870. if (getLangOpts().OpenMPUseTLS &&
  871. getContext().getTargetInfo().isTLSSupported()) {
  872. assert(CapturedStmtInfo->lookup(VD) &&
  873. "Copyin threadprivates should have been captured!");
  874. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
  875. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  876. MasterAddr = EmitLValue(&DRE).getAddress();
  877. LocalDeclMap.erase(VD);
  878. } else {
  879. MasterAddr =
  880. Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
  881. : CGM.GetAddrOfGlobal(VD),
  882. getContext().getDeclAlign(VD));
  883. }
  884. // Get the address of the threadprivate variable.
  885. Address PrivateAddr = EmitLValue(*IRef).getAddress();
  886. if (CopiedVars.size() == 1) {
  887. // At first check if current thread is a master thread. If it is, no
  888. // need to copy data.
  889. CopyBegin = createBasicBlock("copyin.not.master");
  890. CopyEnd = createBasicBlock("copyin.not.master.end");
  891. Builder.CreateCondBr(
  892. Builder.CreateICmpNE(
  893. Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
  894. Builder.CreatePtrToInt(PrivateAddr.getPointer(),
  895. CGM.IntPtrTy)),
  896. CopyBegin, CopyEnd);
  897. EmitBlock(CopyBegin);
  898. }
  899. const auto *SrcVD =
  900. cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  901. const auto *DestVD =
  902. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  903. EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
  904. }
  905. ++IRef;
  906. ++ISrcRef;
  907. ++IDestRef;
  908. }
  909. }
  910. if (CopyEnd) {
  911. // Exit out of copying procedure for non-master thread.
  912. EmitBlock(CopyEnd, /*IsFinished=*/true);
  913. return true;
  914. }
  915. return false;
  916. }
  917. bool CodeGenFunction::EmitOMPLastprivateClauseInit(
  918. const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
  919. if (!HaveInsertPoint())
  920. return false;
  921. bool HasAtLeastOneLastprivate = false;
  922. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  923. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  924. const auto *LoopDirective = cast<OMPLoopDirective>(&D);
  925. for (const Expr *C : LoopDirective->counters()) {
  926. SIMDLCVs.insert(
  927. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  928. }
  929. }
  930. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  931. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  932. HasAtLeastOneLastprivate = true;
  933. if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
  934. !getLangOpts().OpenMPSimd)
  935. break;
  936. auto IRef = C->varlist_begin();
  937. auto IDestRef = C->destination_exprs().begin();
  938. for (const Expr *IInit : C->private_copies()) {
  939. // Keep the address of the original variable for future update at the end
  940. // of the loop.
  941. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  942. // Taskloops do not require additional initialization, it is done in
  943. // runtime support library.
  944. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
  945. const auto *DestVD =
  946. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  947. PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
  948. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
  949. /*RefersToEnclosingVariableOrCapture=*/
  950. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  951. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  952. return EmitLValue(&DRE).getAddress();
  953. });
  954. // Check if the variable is also a firstprivate: in this case IInit is
  955. // not generated. Initialization of this variable will happen in codegen
  956. // for 'firstprivate' clause.
  957. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
  958. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  959. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
  960. // Emit private VarDecl with copy init.
  961. EmitDecl(*VD);
  962. return GetAddrOfLocalVar(VD);
  963. });
  964. assert(IsRegistered &&
  965. "lastprivate var already registered as private");
  966. (void)IsRegistered;
  967. }
  968. }
  969. ++IRef;
  970. ++IDestRef;
  971. }
  972. }
  973. return HasAtLeastOneLastprivate;
  974. }
  975. void CodeGenFunction::EmitOMPLastprivateClauseFinal(
  976. const OMPExecutableDirective &D, bool NoFinals,
  977. llvm::Value *IsLastIterCond) {
  978. if (!HaveInsertPoint())
  979. return;
  980. // Emit following code:
  981. // if (<IsLastIterCond>) {
  982. // orig_var1 = private_orig_var1;
  983. // ...
  984. // orig_varn = private_orig_varn;
  985. // }
  986. llvm::BasicBlock *ThenBB = nullptr;
  987. llvm::BasicBlock *DoneBB = nullptr;
  988. if (IsLastIterCond) {
  989. ThenBB = createBasicBlock(".omp.lastprivate.then");
  990. DoneBB = createBasicBlock(".omp.lastprivate.done");
  991. Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
  992. EmitBlock(ThenBB);
  993. }
  994. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  995. llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
  996. if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
  997. auto IC = LoopDirective->counters().begin();
  998. for (const Expr *F : LoopDirective->finals()) {
  999. const auto *D =
  1000. cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
  1001. if (NoFinals)
  1002. AlreadyEmittedVars.insert(D);
  1003. else
  1004. LoopCountersAndUpdates[D] = F;
  1005. ++IC;
  1006. }
  1007. }
  1008. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  1009. auto IRef = C->varlist_begin();
  1010. auto ISrcRef = C->source_exprs().begin();
  1011. auto IDestRef = C->destination_exprs().begin();
  1012. for (const Expr *AssignOp : C->assignment_ops()) {
  1013. const auto *PrivateVD =
  1014. cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  1015. QualType Type = PrivateVD->getType();
  1016. const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
  1017. if (AlreadyEmittedVars.insert(CanonicalVD).second) {
  1018. // If lastprivate variable is a loop control variable for loop-based
  1019. // directive, update its value before copyin back to original
  1020. // variable.
  1021. if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
  1022. EmitIgnoredExpr(FinalExpr);
  1023. const auto *SrcVD =
  1024. cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  1025. const auto *DestVD =
  1026. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  1027. // Get the address of the original variable.
  1028. Address OriginalAddr = GetAddrOfLocalVar(DestVD);
  1029. // Get the address of the private variable.
  1030. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
  1031. if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
  1032. PrivateAddr =
  1033. Address(Builder.CreateLoad(PrivateAddr),
  1034. getNaturalTypeAlignment(RefTy->getPointeeType()));
  1035. EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
  1036. }
  1037. ++IRef;
  1038. ++ISrcRef;
  1039. ++IDestRef;
  1040. }
  1041. if (const Expr *PostUpdate = C->getPostUpdateExpr())
  1042. EmitIgnoredExpr(PostUpdate);
  1043. }
  1044. if (IsLastIterCond)
  1045. EmitBlock(DoneBB, /*IsFinished=*/true);
  1046. }
  1047. void CodeGenFunction::EmitOMPReductionClauseInit(
  1048. const OMPExecutableDirective &D,
  1049. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  1050. if (!HaveInsertPoint())
  1051. return;
  1052. SmallVector<const Expr *, 4> Shareds;
  1053. SmallVector<const Expr *, 4> Privates;
  1054. SmallVector<const Expr *, 4> ReductionOps;
  1055. SmallVector<const Expr *, 4> LHSs;
  1056. SmallVector<const Expr *, 4> RHSs;
  1057. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1058. auto IPriv = C->privates().begin();
  1059. auto IRed = C->reduction_ops().begin();
  1060. auto ILHS = C->lhs_exprs().begin();
  1061. auto IRHS = C->rhs_exprs().begin();
  1062. for (const Expr *Ref : C->varlists()) {
  1063. Shareds.emplace_back(Ref);
  1064. Privates.emplace_back(*IPriv);
  1065. ReductionOps.emplace_back(*IRed);
  1066. LHSs.emplace_back(*ILHS);
  1067. RHSs.emplace_back(*IRHS);
  1068. std::advance(IPriv, 1);
  1069. std::advance(IRed, 1);
  1070. std::advance(ILHS, 1);
  1071. std::advance(IRHS, 1);
  1072. }
  1073. }
  1074. ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
  1075. unsigned Count = 0;
  1076. auto ILHS = LHSs.begin();
  1077. auto IRHS = RHSs.begin();
  1078. auto IPriv = Privates.begin();
  1079. for (const Expr *IRef : Shareds) {
  1080. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
  1081. // Emit private VarDecl with reduction init.
  1082. RedCG.emitSharedLValue(*this, Count);
  1083. RedCG.emitAggregateType(*this, Count);
  1084. AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
  1085. RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
  1086. RedCG.getSharedLValue(Count),
  1087. [&Emission](CodeGenFunction &CGF) {
  1088. CGF.EmitAutoVarInit(Emission);
  1089. return true;
  1090. });
  1091. EmitAutoVarCleanups(Emission);
  1092. Address BaseAddr = RedCG.adjustPrivateAddress(
  1093. *this, Count, Emission.getAllocatedAddress());
  1094. bool IsRegistered = PrivateScope.addPrivate(
  1095. RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
  1096. assert(IsRegistered && "private var already registered as private");
  1097. // Silence the warning about unused variable.
  1098. (void)IsRegistered;
  1099. const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  1100. const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  1101. QualType Type = PrivateVD->getType();
  1102. bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
  1103. if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
  1104. // Store the address of the original variable associated with the LHS
  1105. // implicit variable.
  1106. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
  1107. return RedCG.getSharedLValue(Count).getAddress();
  1108. });
  1109. PrivateScope.addPrivate(
  1110. RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
  1111. } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
  1112. isa<ArraySubscriptExpr>(IRef)) {
  1113. // Store the address of the original variable associated with the LHS
  1114. // implicit variable.
  1115. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
  1116. return RedCG.getSharedLValue(Count).getAddress();
  1117. });
  1118. PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
  1119. return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
  1120. ConvertTypeForMem(RHSVD->getType()),
  1121. "rhs.begin");
  1122. });
  1123. } else {
  1124. QualType Type = PrivateVD->getType();
  1125. bool IsArray = getContext().getAsArrayType(Type) != nullptr;
  1126. Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
  1127. // Store the address of the original variable associated with the LHS
  1128. // implicit variable.
  1129. if (IsArray) {
  1130. OriginalAddr = Builder.CreateElementBitCast(
  1131. OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
  1132. }
  1133. PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
  1134. PrivateScope.addPrivate(
  1135. RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
  1136. return IsArray
  1137. ? Builder.CreateElementBitCast(
  1138. GetAddrOfLocalVar(PrivateVD),
  1139. ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
  1140. : GetAddrOfLocalVar(PrivateVD);
  1141. });
  1142. }
  1143. ++ILHS;
  1144. ++IRHS;
  1145. ++IPriv;
  1146. ++Count;
  1147. }
  1148. }
  1149. void CodeGenFunction::EmitOMPReductionClauseFinal(
  1150. const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
  1151. if (!HaveInsertPoint())
  1152. return;
  1153. llvm::SmallVector<const Expr *, 8> Privates;
  1154. llvm::SmallVector<const Expr *, 8> LHSExprs;
  1155. llvm::SmallVector<const Expr *, 8> RHSExprs;
  1156. llvm::SmallVector<const Expr *, 8> ReductionOps;
  1157. bool HasAtLeastOneReduction = false;
  1158. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1159. HasAtLeastOneReduction = true;
  1160. Privates.append(C->privates().begin(), C->privates().end());
  1161. LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
  1162. RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
  1163. ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
  1164. }
  1165. if (HasAtLeastOneReduction) {
  1166. bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
  1167. isOpenMPParallelDirective(D.getDirectiveKind()) ||
  1168. ReductionKind == OMPD_simd;
  1169. bool SimpleReduction = ReductionKind == OMPD_simd;
  1170. // Emit nowait reduction if nowait clause is present or directive is a
  1171. // parallel directive (it always has implicit barrier).
  1172. CGM.getOpenMPRuntime().emitReduction(
  1173. *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
  1174. {WithNowait, SimpleReduction, ReductionKind});
  1175. }
  1176. }
  1177. static void emitPostUpdateForReductionClause(
  1178. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  1179. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1180. if (!CGF.HaveInsertPoint())
  1181. return;
  1182. llvm::BasicBlock *DoneBB = nullptr;
  1183. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1184. if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
  1185. if (!DoneBB) {
  1186. if (llvm::Value *Cond = CondGen(CGF)) {
  1187. // If the first post-update expression is found, emit conditional
  1188. // block if it was requested.
  1189. llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
  1190. DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
  1191. CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1192. CGF.EmitBlock(ThenBB);
  1193. }
  1194. }
  1195. CGF.EmitIgnoredExpr(PostUpdate);
  1196. }
  1197. }
  1198. if (DoneBB)
  1199. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  1200. }
  1201. namespace {
  1202. /// Codegen lambda for appending distribute lower and upper bounds to outlined
  1203. /// parallel function. This is necessary for combined constructs such as
  1204. /// 'distribute parallel for'
  1205. typedef llvm::function_ref<void(CodeGenFunction &,
  1206. const OMPExecutableDirective &,
  1207. llvm::SmallVectorImpl<llvm::Value *> &)>
  1208. CodeGenBoundParametersTy;
  1209. } // anonymous namespace
  1210. static void emitCommonOMPParallelDirective(
  1211. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  1212. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  1213. const CodeGenBoundParametersTy &CodeGenBoundParameters) {
  1214. const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
  1215. llvm::Function *OutlinedFn =
  1216. CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
  1217. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  1218. if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
  1219. CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
  1220. llvm::Value *NumThreads =
  1221. CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
  1222. /*IgnoreResultAssign=*/true);
  1223. CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
  1224. CGF, NumThreads, NumThreadsClause->getBeginLoc());
  1225. }
  1226. if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
  1227. CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
  1228. CGF.CGM.getOpenMPRuntime().emitProcBindClause(
  1229. CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
  1230. }
  1231. const Expr *IfCond = nullptr;
  1232. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  1233. if (C->getNameModifier() == OMPD_unknown ||
  1234. C->getNameModifier() == OMPD_parallel) {
  1235. IfCond = C->getCondition();
  1236. break;
  1237. }
  1238. }
  1239. OMPParallelScope Scope(CGF, S);
  1240. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  1241. // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
  1242. // lower and upper bounds with the pragma 'for' chunking mechanism.
  1243. // The following lambda takes care of appending the lower and upper bound
  1244. // parameters when necessary
  1245. CodeGenBoundParameters(CGF, S, CapturedVars);
  1246. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  1247. CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
  1248. CapturedVars, IfCond);
  1249. }
  1250. static void emitEmptyBoundParameters(CodeGenFunction &,
  1251. const OMPExecutableDirective &,
  1252. llvm::SmallVectorImpl<llvm::Value *> &) {}
  1253. void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
  1254. // Emit parallel region as a standalone region.
  1255. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1256. Action.Enter(CGF);
  1257. OMPPrivateScope PrivateScope(CGF);
  1258. bool Copyins = CGF.EmitOMPCopyinClause(S);
  1259. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  1260. if (Copyins) {
  1261. // Emit implicit barrier to synchronize threads and avoid data races on
  1262. // propagation master's thread values of threadprivate variables to local
  1263. // instances of that variables of all other implicit threads.
  1264. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  1265. CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  1266. /*ForceSimpleCall=*/true);
  1267. }
  1268. CGF.EmitOMPPrivateClause(S, PrivateScope);
  1269. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  1270. (void)PrivateScope.Privatize();
  1271. CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
  1272. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  1273. };
  1274. emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
  1275. emitEmptyBoundParameters);
  1276. emitPostUpdateForReductionClause(*this, S,
  1277. [](CodeGenFunction &) { return nullptr; });
  1278. }
  1279. void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
  1280. JumpDest LoopExit) {
  1281. RunCleanupsScope BodyScope(*this);
  1282. // Update counters values on current iteration.
  1283. for (const Expr *UE : D.updates())
  1284. EmitIgnoredExpr(UE);
  1285. // Update the linear variables.
  1286. // In distribute directives only loop counters may be marked as linear, no
  1287. // need to generate the code for them.
  1288. if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
  1289. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1290. for (const Expr *UE : C->updates())
  1291. EmitIgnoredExpr(UE);
  1292. }
  1293. }
  1294. // On a continue in the body, jump to the end.
  1295. JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
  1296. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1297. for (const Expr *E : D.finals_conditions()) {
  1298. if (!E)
  1299. continue;
  1300. // Check that loop counter in non-rectangular nest fits into the iteration
  1301. // space.
  1302. llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
  1303. EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
  1304. getProfileCount(D.getBody()));
  1305. EmitBlock(NextBB);
  1306. }
  1307. // Emit loop variables for C++ range loops.
  1308. const Stmt *Body =
  1309. D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
  1310. for (unsigned Cnt = 0; Cnt < D.getCollapsedNumber(); ++Cnt) {
  1311. Body = Body->IgnoreContainers();
  1312. if (auto *For = dyn_cast<ForStmt>(Body)) {
  1313. Body = For->getBody();
  1314. } else {
  1315. assert(isa<CXXForRangeStmt>(Body) &&
  1316. "Expected canonical for loop or range-based for loop.");
  1317. auto *CXXFor = cast<CXXForRangeStmt>(Body);
  1318. EmitStmt(CXXFor->getLoopVarStmt());
  1319. Body = CXXFor->getBody();
  1320. }
  1321. }
  1322. // Emit loop body.
  1323. EmitStmt(D.getBody());
  1324. // The end (updates/cleanups).
  1325. EmitBlock(Continue.getBlock());
  1326. BreakContinueStack.pop_back();
  1327. }
  1328. void CodeGenFunction::EmitOMPInnerLoop(
  1329. const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
  1330. const Expr *IncExpr,
  1331. const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
  1332. const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
  1333. auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
  1334. // Start the loop with a block that tests the condition.
  1335. auto CondBlock = createBasicBlock("omp.inner.for.cond");
  1336. EmitBlock(CondBlock);
  1337. const SourceRange R = S.getSourceRange();
  1338. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1339. SourceLocToDebugLoc(R.getEnd()));
  1340. // If there are any cleanups between here and the loop-exit scope,
  1341. // create a block to stage a loop exit along.
  1342. llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
  1343. if (RequiresCleanup)
  1344. ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
  1345. llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
  1346. // Emit condition.
  1347. EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
  1348. if (ExitBlock != LoopExit.getBlock()) {
  1349. EmitBlock(ExitBlock);
  1350. EmitBranchThroughCleanup(LoopExit);
  1351. }
  1352. EmitBlock(LoopBody);
  1353. incrementProfileCounter(&S);
  1354. // Create a block for the increment.
  1355. JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
  1356. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1357. BodyGen(*this);
  1358. // Emit "IV = IV + 1" and a back-edge to the condition block.
  1359. EmitBlock(Continue.getBlock());
  1360. EmitIgnoredExpr(IncExpr);
  1361. PostIncGen(*this);
  1362. BreakContinueStack.pop_back();
  1363. EmitBranch(CondBlock);
  1364. LoopStack.pop();
  1365. // Emit the fall-through block.
  1366. EmitBlock(LoopExit.getBlock());
  1367. }
  1368. bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
  1369. if (!HaveInsertPoint())
  1370. return false;
  1371. // Emit inits for the linear variables.
  1372. bool HasLinears = false;
  1373. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1374. for (const Expr *Init : C->inits()) {
  1375. HasLinears = true;
  1376. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
  1377. if (const auto *Ref =
  1378. dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
  1379. AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
  1380. const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
  1381. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
  1382. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1383. VD->getInit()->getType(), VK_LValue,
  1384. VD->getInit()->getExprLoc());
  1385. EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
  1386. VD->getType()),
  1387. /*capturedByInit=*/false);
  1388. EmitAutoVarCleanups(Emission);
  1389. } else {
  1390. EmitVarDecl(*VD);
  1391. }
  1392. }
  1393. // Emit the linear steps for the linear clauses.
  1394. // If a step is not constant, it is pre-calculated before the loop.
  1395. if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
  1396. if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
  1397. EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
  1398. // Emit calculation of the linear step.
  1399. EmitIgnoredExpr(CS);
  1400. }
  1401. }
  1402. return HasLinears;
  1403. }
  1404. void CodeGenFunction::EmitOMPLinearClauseFinal(
  1405. const OMPLoopDirective &D,
  1406. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1407. if (!HaveInsertPoint())
  1408. return;
  1409. llvm::BasicBlock *DoneBB = nullptr;
  1410. // Emit the final values of the linear variables.
  1411. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1412. auto IC = C->varlist_begin();
  1413. for (const Expr *F : C->finals()) {
  1414. if (!DoneBB) {
  1415. if (llvm::Value *Cond = CondGen(*this)) {
  1416. // If the first post-update expression is found, emit conditional
  1417. // block if it was requested.
  1418. llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
  1419. DoneBB = createBasicBlock(".omp.linear.pu.done");
  1420. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1421. EmitBlock(ThenBB);
  1422. }
  1423. }
  1424. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
  1425. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
  1426. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1427. (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
  1428. Address OrigAddr = EmitLValue(&DRE).getAddress();
  1429. CodeGenFunction::OMPPrivateScope VarScope(*this);
  1430. VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
  1431. (void)VarScope.Privatize();
  1432. EmitIgnoredExpr(F);
  1433. ++IC;
  1434. }
  1435. if (const Expr *PostUpdate = C->getPostUpdateExpr())
  1436. EmitIgnoredExpr(PostUpdate);
  1437. }
  1438. if (DoneBB)
  1439. EmitBlock(DoneBB, /*IsFinished=*/true);
  1440. }
  1441. static void emitAlignedClause(CodeGenFunction &CGF,
  1442. const OMPExecutableDirective &D) {
  1443. if (!CGF.HaveInsertPoint())
  1444. return;
  1445. for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
  1446. unsigned ClauseAlignment = 0;
  1447. if (const Expr *AlignmentExpr = Clause->getAlignment()) {
  1448. auto *AlignmentCI =
  1449. cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
  1450. ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
  1451. }
  1452. for (const Expr *E : Clause->varlists()) {
  1453. unsigned Alignment = ClauseAlignment;
  1454. if (Alignment == 0) {
  1455. // OpenMP [2.8.1, Description]
  1456. // If no optional parameter is specified, implementation-defined default
  1457. // alignments for SIMD instructions on the target platforms are assumed.
  1458. Alignment =
  1459. CGF.getContext()
  1460. .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
  1461. E->getType()->getPointeeType()))
  1462. .getQuantity();
  1463. }
  1464. assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
  1465. "alignment is not power of 2");
  1466. if (Alignment != 0) {
  1467. llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
  1468. CGF.EmitAlignmentAssumption(
  1469. PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
  1470. }
  1471. }
  1472. }
  1473. }
  1474. void CodeGenFunction::EmitOMPPrivateLoopCounters(
  1475. const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
  1476. if (!HaveInsertPoint())
  1477. return;
  1478. auto I = S.private_counters().begin();
  1479. for (const Expr *E : S.counters()) {
  1480. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1481. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
  1482. // Emit var without initialization.
  1483. AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
  1484. EmitAutoVarCleanups(VarEmission);
  1485. LocalDeclMap.erase(PrivateVD);
  1486. (void)LoopScope.addPrivate(VD, [&VarEmission]() {
  1487. return VarEmission.getAllocatedAddress();
  1488. });
  1489. if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
  1490. VD->hasGlobalStorage()) {
  1491. (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
  1492. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
  1493. LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
  1494. E->getType(), VK_LValue, E->getExprLoc());
  1495. return EmitLValue(&DRE).getAddress();
  1496. });
  1497. } else {
  1498. (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
  1499. return VarEmission.getAllocatedAddress();
  1500. });
  1501. }
  1502. ++I;
  1503. }
  1504. // Privatize extra loop counters used in loops for ordered(n) clauses.
  1505. for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
  1506. if (!C->getNumForLoops())
  1507. continue;
  1508. for (unsigned I = S.getCollapsedNumber(),
  1509. E = C->getLoopNumIterations().size();
  1510. I < E; ++I) {
  1511. const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
  1512. const auto *VD = cast<VarDecl>(DRE->getDecl());
  1513. // Override only those variables that can be captured to avoid re-emission
  1514. // of the variables declared within the loops.
  1515. if (DRE->refersToEnclosingVariableOrCapture()) {
  1516. (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
  1517. return CreateMemTemp(DRE->getType(), VD->getName());
  1518. });
  1519. }
  1520. }
  1521. }
  1522. }
  1523. static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1524. const Expr *Cond, llvm::BasicBlock *TrueBlock,
  1525. llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
  1526. if (!CGF.HaveInsertPoint())
  1527. return;
  1528. {
  1529. CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
  1530. CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
  1531. (void)PreCondScope.Privatize();
  1532. // Get initial values of real counters.
  1533. for (const Expr *I : S.inits()) {
  1534. CGF.EmitIgnoredExpr(I);
  1535. }
  1536. }
  1537. // Create temp loop control variables with their init values to support
  1538. // non-rectangular loops.
  1539. CodeGenFunction::OMPMapVars PreCondVars;
  1540. for (const Expr * E: S.dependent_counters()) {
  1541. if (!E)
  1542. continue;
  1543. assert(!E->getType().getNonReferenceType()->isRecordType() &&
  1544. "dependent counter must not be an iterator.");
  1545. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1546. Address CounterAddr =
  1547. CGF.CreateMemTemp(VD->getType().getNonReferenceType());
  1548. (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr);
  1549. }
  1550. (void)PreCondVars.apply(CGF);
  1551. for (const Expr *E : S.dependent_inits()) {
  1552. if (!E)
  1553. continue;
  1554. CGF.EmitIgnoredExpr(E);
  1555. }
  1556. // Check that loop is executed at least one time.
  1557. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
  1558. PreCondVars.restore(CGF);
  1559. }
  1560. void CodeGenFunction::EmitOMPLinearClause(
  1561. const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
  1562. if (!HaveInsertPoint())
  1563. return;
  1564. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  1565. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  1566. const auto *LoopDirective = cast<OMPLoopDirective>(&D);
  1567. for (const Expr *C : LoopDirective->counters()) {
  1568. SIMDLCVs.insert(
  1569. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  1570. }
  1571. }
  1572. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1573. auto CurPrivate = C->privates().begin();
  1574. for (const Expr *E : C->varlists()) {
  1575. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1576. const auto *PrivateVD =
  1577. cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
  1578. if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
  1579. bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
  1580. // Emit private VarDecl with copy init.
  1581. EmitVarDecl(*PrivateVD);
  1582. return GetAddrOfLocalVar(PrivateVD);
  1583. });
  1584. assert(IsRegistered && "linear var already registered as private");
  1585. // Silence the warning about unused variable.
  1586. (void)IsRegistered;
  1587. } else {
  1588. EmitVarDecl(*PrivateVD);
  1589. }
  1590. ++CurPrivate;
  1591. }
  1592. }
  1593. }
  1594. static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
  1595. const OMPExecutableDirective &D,
  1596. bool IsMonotonic) {
  1597. if (!CGF.HaveInsertPoint())
  1598. return;
  1599. if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
  1600. RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
  1601. /*ignoreResult=*/true);
  1602. auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1603. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1604. // In presence of finite 'safelen', it may be unsafe to mark all
  1605. // the memory instructions parallel, because loop-carried
  1606. // dependences of 'safelen' iterations are possible.
  1607. if (!IsMonotonic)
  1608. CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
  1609. } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
  1610. RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
  1611. /*ignoreResult=*/true);
  1612. auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1613. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1614. // In presence of finite 'safelen', it may be unsafe to mark all
  1615. // the memory instructions parallel, because loop-carried
  1616. // dependences of 'safelen' iterations are possible.
  1617. CGF.LoopStack.setParallel(/*Enable=*/false);
  1618. }
  1619. }
  1620. void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
  1621. bool IsMonotonic) {
  1622. // Walk clauses and process safelen/lastprivate.
  1623. LoopStack.setParallel(!IsMonotonic);
  1624. LoopStack.setVectorizeEnable();
  1625. emitSimdlenSafelenClause(*this, D, IsMonotonic);
  1626. }
  1627. void CodeGenFunction::EmitOMPSimdFinal(
  1628. const OMPLoopDirective &D,
  1629. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1630. if (!HaveInsertPoint())
  1631. return;
  1632. llvm::BasicBlock *DoneBB = nullptr;
  1633. auto IC = D.counters().begin();
  1634. auto IPC = D.private_counters().begin();
  1635. for (const Expr *F : D.finals()) {
  1636. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
  1637. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
  1638. const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
  1639. if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
  1640. OrigVD->hasGlobalStorage() || CED) {
  1641. if (!DoneBB) {
  1642. if (llvm::Value *Cond = CondGen(*this)) {
  1643. // If the first post-update expression is found, emit conditional
  1644. // block if it was requested.
  1645. llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
  1646. DoneBB = createBasicBlock(".omp.final.done");
  1647. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1648. EmitBlock(ThenBB);
  1649. }
  1650. }
  1651. Address OrigAddr = Address::invalid();
  1652. if (CED) {
  1653. OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
  1654. } else {
  1655. DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
  1656. /*RefersToEnclosingVariableOrCapture=*/false,
  1657. (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
  1658. OrigAddr = EmitLValue(&DRE).getAddress();
  1659. }
  1660. OMPPrivateScope VarScope(*this);
  1661. VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
  1662. (void)VarScope.Privatize();
  1663. EmitIgnoredExpr(F);
  1664. }
  1665. ++IC;
  1666. ++IPC;
  1667. }
  1668. if (DoneBB)
  1669. EmitBlock(DoneBB, /*IsFinished=*/true);
  1670. }
  1671. static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
  1672. const OMPLoopDirective &S,
  1673. CodeGenFunction::JumpDest LoopExit) {
  1674. CGF.EmitOMPLoopBody(S, LoopExit);
  1675. CGF.EmitStopPoint(&S);
  1676. }
  1677. /// Emit a helper variable and return corresponding lvalue.
  1678. static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
  1679. const DeclRefExpr *Helper) {
  1680. auto VDecl = cast<VarDecl>(Helper->getDecl());
  1681. CGF.EmitVarDecl(*VDecl);
  1682. return CGF.EmitLValue(Helper);
  1683. }
  1684. static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1685. PrePostActionTy &Action) {
  1686. Action.Enter(CGF);
  1687. assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
  1688. "Expected simd directive");
  1689. OMPLoopScope PreInitScope(CGF, S);
  1690. // if (PreCond) {
  1691. // for (IV in 0..LastIteration) BODY;
  1692. // <Final counter/linear vars updates>;
  1693. // }
  1694. //
  1695. if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
  1696. isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
  1697. isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
  1698. (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
  1699. (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
  1700. }
  1701. // Emit: if (PreCond) - begin.
  1702. // If the condition constant folds and can be elided, avoid emitting the
  1703. // whole loop.
  1704. bool CondConstant;
  1705. llvm::BasicBlock *ContBlock = nullptr;
  1706. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  1707. if (!CondConstant)
  1708. return;
  1709. } else {
  1710. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
  1711. ContBlock = CGF.createBasicBlock("simd.if.end");
  1712. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  1713. CGF.getProfileCount(&S));
  1714. CGF.EmitBlock(ThenBlock);
  1715. CGF.incrementProfileCounter(&S);
  1716. }
  1717. // Emit the loop iteration variable.
  1718. const Expr *IVExpr = S.getIterationVariable();
  1719. const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  1720. CGF.EmitVarDecl(*IVDecl);
  1721. CGF.EmitIgnoredExpr(S.getInit());
  1722. // Emit the iterations count variable.
  1723. // If it is not a variable, Sema decided to calculate iterations count on
  1724. // each iteration (e.g., it is foldable into a constant).
  1725. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  1726. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  1727. // Emit calculation of the iterations count.
  1728. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  1729. }
  1730. CGF.EmitOMPSimdInit(S);
  1731. emitAlignedClause(CGF, S);
  1732. (void)CGF.EmitOMPLinearClauseInit(S);
  1733. {
  1734. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  1735. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  1736. CGF.EmitOMPLinearClause(S, LoopScope);
  1737. CGF.EmitOMPPrivateClause(S, LoopScope);
  1738. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  1739. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  1740. (void)LoopScope.Privatize();
  1741. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  1742. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  1743. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  1744. S.getInc(),
  1745. [&S](CodeGenFunction &CGF) {
  1746. CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
  1747. CGF.EmitStopPoint(&S);
  1748. },
  1749. [](CodeGenFunction &) {});
  1750. CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
  1751. // Emit final copy of the lastprivate variables at the end of loops.
  1752. if (HasLastprivateClause)
  1753. CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
  1754. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
  1755. emitPostUpdateForReductionClause(CGF, S,
  1756. [](CodeGenFunction &) { return nullptr; });
  1757. }
  1758. CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
  1759. // Emit: if (PreCond) - end.
  1760. if (ContBlock) {
  1761. CGF.EmitBranch(ContBlock);
  1762. CGF.EmitBlock(ContBlock, true);
  1763. }
  1764. }
  1765. void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
  1766. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1767. emitOMPSimdRegion(CGF, S, Action);
  1768. };
  1769. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  1770. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  1771. }
  1772. void CodeGenFunction::EmitOMPOuterLoop(
  1773. bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
  1774. CodeGenFunction::OMPPrivateScope &LoopScope,
  1775. const CodeGenFunction::OMPLoopArguments &LoopArgs,
  1776. const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
  1777. const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
  1778. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1779. const Expr *IVExpr = S.getIterationVariable();
  1780. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1781. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1782. JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
  1783. // Start the loop with a block that tests the condition.
  1784. llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
  1785. EmitBlock(CondBlock);
  1786. const SourceRange R = S.getSourceRange();
  1787. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1788. SourceLocToDebugLoc(R.getEnd()));
  1789. llvm::Value *BoolCondVal = nullptr;
  1790. if (!DynamicOrOrdered) {
  1791. // UB = min(UB, GlobalUB) or
  1792. // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
  1793. // 'distribute parallel for')
  1794. EmitIgnoredExpr(LoopArgs.EUB);
  1795. // IV = LB
  1796. EmitIgnoredExpr(LoopArgs.Init);
  1797. // IV < UB
  1798. BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
  1799. } else {
  1800. BoolCondVal =
  1801. RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
  1802. LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
  1803. }
  1804. // If there are any cleanups between here and the loop-exit scope,
  1805. // create a block to stage a loop exit along.
  1806. llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
  1807. if (LoopScope.requiresCleanups())
  1808. ExitBlock = createBasicBlock("omp.dispatch.cleanup");
  1809. llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
  1810. Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
  1811. if (ExitBlock != LoopExit.getBlock()) {
  1812. EmitBlock(ExitBlock);
  1813. EmitBranchThroughCleanup(LoopExit);
  1814. }
  1815. EmitBlock(LoopBody);
  1816. // Emit "IV = LB" (in case of static schedule, we have already calculated new
  1817. // LB for loop condition and emitted it above).
  1818. if (DynamicOrOrdered)
  1819. EmitIgnoredExpr(LoopArgs.Init);
  1820. // Create a block for the increment.
  1821. JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
  1822. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1823. // Generate !llvm.loop.parallel metadata for loads and stores for loops
  1824. // with dynamic/guided scheduling and without ordered clause.
  1825. if (!isOpenMPSimdDirective(S.getDirectiveKind()))
  1826. LoopStack.setParallel(!IsMonotonic);
  1827. else
  1828. EmitOMPSimdInit(S, IsMonotonic);
  1829. SourceLocation Loc = S.getBeginLoc();
  1830. // when 'distribute' is not combined with a 'for':
  1831. // while (idx <= UB) { BODY; ++idx; }
  1832. // when 'distribute' is combined with a 'for'
  1833. // (e.g. 'distribute parallel for')
  1834. // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
  1835. EmitOMPInnerLoop(
  1836. S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
  1837. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  1838. CodeGenLoop(CGF, S, LoopExit);
  1839. },
  1840. [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
  1841. CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
  1842. });
  1843. EmitBlock(Continue.getBlock());
  1844. BreakContinueStack.pop_back();
  1845. if (!DynamicOrOrdered) {
  1846. // Emit "LB = LB + Stride", "UB = UB + Stride".
  1847. EmitIgnoredExpr(LoopArgs.NextLB);
  1848. EmitIgnoredExpr(LoopArgs.NextUB);
  1849. }
  1850. EmitBranch(CondBlock);
  1851. LoopStack.pop();
  1852. // Emit the fall-through block.
  1853. EmitBlock(LoopExit.getBlock());
  1854. // Tell the runtime we are done.
  1855. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
  1856. if (!DynamicOrOrdered)
  1857. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  1858. S.getDirectiveKind());
  1859. };
  1860. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  1861. }
  1862. void CodeGenFunction::EmitOMPForOuterLoop(
  1863. const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
  1864. const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
  1865. const OMPLoopArguments &LoopArgs,
  1866. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  1867. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1868. // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
  1869. const bool DynamicOrOrdered =
  1870. Ordered || RT.isDynamic(ScheduleKind.Schedule);
  1871. assert((Ordered ||
  1872. !RT.isStaticNonchunked(ScheduleKind.Schedule,
  1873. LoopArgs.Chunk != nullptr)) &&
  1874. "static non-chunked schedule does not need outer loop");
  1875. // Emit outer loop.
  1876. //
  1877. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1878. // When schedule(dynamic,chunk_size) is specified, the iterations are
  1879. // distributed to threads in the team in chunks as the threads request them.
  1880. // Each thread executes a chunk of iterations, then requests another chunk,
  1881. // until no chunks remain to be distributed. Each chunk contains chunk_size
  1882. // iterations, except for the last chunk to be distributed, which may have
  1883. // fewer iterations. When no chunk_size is specified, it defaults to 1.
  1884. //
  1885. // When schedule(guided,chunk_size) is specified, the iterations are assigned
  1886. // to threads in the team in chunks as the executing threads request them.
  1887. // Each thread executes a chunk of iterations, then requests another chunk,
  1888. // until no chunks remain to be assigned. For a chunk_size of 1, the size of
  1889. // each chunk is proportional to the number of unassigned iterations divided
  1890. // by the number of threads in the team, decreasing to 1. For a chunk_size
  1891. // with value k (greater than 1), the size of each chunk is determined in the
  1892. // same way, with the restriction that the chunks do not contain fewer than k
  1893. // iterations (except for the last chunk to be assigned, which may have fewer
  1894. // than k iterations).
  1895. //
  1896. // When schedule(auto) is specified, the decision regarding scheduling is
  1897. // delegated to the compiler and/or runtime system. The programmer gives the
  1898. // implementation the freedom to choose any possible mapping of iterations to
  1899. // threads in the team.
  1900. //
  1901. // When schedule(runtime) is specified, the decision regarding scheduling is
  1902. // deferred until run time, and the schedule and chunk size are taken from the
  1903. // run-sched-var ICV. If the ICV is set to auto, the schedule is
  1904. // implementation defined
  1905. //
  1906. // while(__kmpc_dispatch_next(&LB, &UB)) {
  1907. // idx = LB;
  1908. // while (idx <= UB) { BODY; ++idx;
  1909. // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
  1910. // } // inner loop
  1911. // }
  1912. //
  1913. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1914. // When schedule(static, chunk_size) is specified, iterations are divided into
  1915. // chunks of size chunk_size, and the chunks are assigned to the threads in
  1916. // the team in a round-robin fashion in the order of the thread number.
  1917. //
  1918. // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
  1919. // while (idx <= UB) { BODY; ++idx; } // inner loop
  1920. // LB = LB + ST;
  1921. // UB = UB + ST;
  1922. // }
  1923. //
  1924. const Expr *IVExpr = S.getIterationVariable();
  1925. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1926. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1927. if (DynamicOrOrdered) {
  1928. const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
  1929. CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
  1930. llvm::Value *LBVal = DispatchBounds.first;
  1931. llvm::Value *UBVal = DispatchBounds.second;
  1932. CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
  1933. LoopArgs.Chunk};
  1934. RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
  1935. IVSigned, Ordered, DipatchRTInputValues);
  1936. } else {
  1937. CGOpenMPRuntime::StaticRTInput StaticInit(
  1938. IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
  1939. LoopArgs.ST, LoopArgs.Chunk);
  1940. RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
  1941. ScheduleKind, StaticInit);
  1942. }
  1943. auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
  1944. const unsigned IVSize,
  1945. const bool IVSigned) {
  1946. if (Ordered) {
  1947. CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
  1948. IVSigned);
  1949. }
  1950. };
  1951. OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
  1952. LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
  1953. OuterLoopArgs.IncExpr = S.getInc();
  1954. OuterLoopArgs.Init = S.getInit();
  1955. OuterLoopArgs.Cond = S.getCond();
  1956. OuterLoopArgs.NextLB = S.getNextLowerBound();
  1957. OuterLoopArgs.NextUB = S.getNextUpperBound();
  1958. EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
  1959. emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
  1960. }
  1961. static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
  1962. const unsigned IVSize, const bool IVSigned) {}
  1963. void CodeGenFunction::EmitOMPDistributeOuterLoop(
  1964. OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
  1965. OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
  1966. const CodeGenLoopTy &CodeGenLoopContent) {
  1967. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1968. // Emit outer loop.
  1969. // Same behavior as a OMPForOuterLoop, except that schedule cannot be
  1970. // dynamic
  1971. //
  1972. const Expr *IVExpr = S.getIterationVariable();
  1973. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1974. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1975. CGOpenMPRuntime::StaticRTInput StaticInit(
  1976. IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
  1977. LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
  1978. RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
  1979. // for combined 'distribute' and 'for' the increment expression of distribute
  1980. // is stored in DistInc. For 'distribute' alone, it is in Inc.
  1981. Expr *IncExpr;
  1982. if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
  1983. IncExpr = S.getDistInc();
  1984. else
  1985. IncExpr = S.getInc();
  1986. // this routine is shared by 'omp distribute parallel for' and
  1987. // 'omp distribute': select the right EUB expression depending on the
  1988. // directive
  1989. OMPLoopArguments OuterLoopArgs;
  1990. OuterLoopArgs.LB = LoopArgs.LB;
  1991. OuterLoopArgs.UB = LoopArgs.UB;
  1992. OuterLoopArgs.ST = LoopArgs.ST;
  1993. OuterLoopArgs.IL = LoopArgs.IL;
  1994. OuterLoopArgs.Chunk = LoopArgs.Chunk;
  1995. OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1996. ? S.getCombinedEnsureUpperBound()
  1997. : S.getEnsureUpperBound();
  1998. OuterLoopArgs.IncExpr = IncExpr;
  1999. OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2000. ? S.getCombinedInit()
  2001. : S.getInit();
  2002. OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2003. ? S.getCombinedCond()
  2004. : S.getCond();
  2005. OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2006. ? S.getCombinedNextLowerBound()
  2007. : S.getNextLowerBound();
  2008. OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2009. ? S.getCombinedNextUpperBound()
  2010. : S.getNextUpperBound();
  2011. EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
  2012. LoopScope, OuterLoopArgs, CodeGenLoopContent,
  2013. emitEmptyOrdered);
  2014. }
  2015. static std::pair<LValue, LValue>
  2016. emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
  2017. const OMPExecutableDirective &S) {
  2018. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  2019. LValue LB =
  2020. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  2021. LValue UB =
  2022. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  2023. // When composing 'distribute' with 'for' (e.g. as in 'distribute
  2024. // parallel for') we need to use the 'distribute'
  2025. // chunk lower and upper bounds rather than the whole loop iteration
  2026. // space. These are parameters to the outlined function for 'parallel'
  2027. // and we copy the bounds of the previous schedule into the
  2028. // the current ones.
  2029. LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
  2030. LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
  2031. llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
  2032. PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
  2033. PrevLBVal = CGF.EmitScalarConversion(
  2034. PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
  2035. LS.getIterationVariable()->getType(),
  2036. LS.getPrevLowerBoundVariable()->getExprLoc());
  2037. llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
  2038. PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
  2039. PrevUBVal = CGF.EmitScalarConversion(
  2040. PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
  2041. LS.getIterationVariable()->getType(),
  2042. LS.getPrevUpperBoundVariable()->getExprLoc());
  2043. CGF.EmitStoreOfScalar(PrevLBVal, LB);
  2044. CGF.EmitStoreOfScalar(PrevUBVal, UB);
  2045. return {LB, UB};
  2046. }
  2047. /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
  2048. /// we need to use the LB and UB expressions generated by the worksharing
  2049. /// code generation support, whereas in non combined situations we would
  2050. /// just emit 0 and the LastIteration expression
  2051. /// This function is necessary due to the difference of the LB and UB
  2052. /// types for the RT emission routines for 'for_static_init' and
  2053. /// 'for_dispatch_init'
  2054. static std::pair<llvm::Value *, llvm::Value *>
  2055. emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
  2056. const OMPExecutableDirective &S,
  2057. Address LB, Address UB) {
  2058. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  2059. const Expr *IVExpr = LS.getIterationVariable();
  2060. // when implementing a dynamic schedule for a 'for' combined with a
  2061. // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
  2062. // is not normalized as each team only executes its own assigned
  2063. // distribute chunk
  2064. QualType IteratorTy = IVExpr->getType();
  2065. llvm::Value *LBVal =
  2066. CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
  2067. llvm::Value *UBVal =
  2068. CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
  2069. return {LBVal, UBVal};
  2070. }
  2071. static void emitDistributeParallelForDistributeInnerBoundParams(
  2072. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  2073. llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
  2074. const auto &Dir = cast<OMPLoopDirective>(S);
  2075. LValue LB =
  2076. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
  2077. llvm::Value *LBCast = CGF.Builder.CreateIntCast(
  2078. CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  2079. CapturedVars.push_back(LBCast);
  2080. LValue UB =
  2081. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
  2082. llvm::Value *UBCast = CGF.Builder.CreateIntCast(
  2083. CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  2084. CapturedVars.push_back(UBCast);
  2085. }
  2086. static void
  2087. emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
  2088. const OMPLoopDirective &S,
  2089. CodeGenFunction::JumpDest LoopExit) {
  2090. auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
  2091. PrePostActionTy &Action) {
  2092. Action.Enter(CGF);
  2093. bool HasCancel = false;
  2094. if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
  2095. if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
  2096. HasCancel = D->hasCancel();
  2097. else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
  2098. HasCancel = D->hasCancel();
  2099. else if (const auto *D =
  2100. dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
  2101. HasCancel = D->hasCancel();
  2102. }
  2103. CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
  2104. HasCancel);
  2105. CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
  2106. emitDistributeParallelForInnerBounds,
  2107. emitDistributeParallelForDispatchBounds);
  2108. };
  2109. emitCommonOMPParallelDirective(
  2110. CGF, S,
  2111. isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
  2112. CGInlinedWorksharingLoop,
  2113. emitDistributeParallelForDistributeInnerBoundParams);
  2114. }
  2115. void CodeGenFunction::EmitOMPDistributeParallelForDirective(
  2116. const OMPDistributeParallelForDirective &S) {
  2117. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2118. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  2119. S.getDistInc());
  2120. };
  2121. OMPLexicalScope Scope(*this, S, OMPD_parallel);
  2122. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  2123. }
  2124. void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
  2125. const OMPDistributeParallelForSimdDirective &S) {
  2126. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2127. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  2128. S.getDistInc());
  2129. };
  2130. OMPLexicalScope Scope(*this, S, OMPD_parallel);
  2131. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  2132. }
  2133. void CodeGenFunction::EmitOMPDistributeSimdDirective(
  2134. const OMPDistributeSimdDirective &S) {
  2135. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2136. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  2137. };
  2138. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2139. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  2140. }
  2141. void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
  2142. CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
  2143. // Emit SPMD target parallel for region as a standalone region.
  2144. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2145. emitOMPSimdRegion(CGF, S, Action);
  2146. };
  2147. llvm::Function *Fn;
  2148. llvm::Constant *Addr;
  2149. // Emit target region as a standalone region.
  2150. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  2151. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  2152. assert(Fn && Addr && "Target device function emission failed.");
  2153. }
  2154. void CodeGenFunction::EmitOMPTargetSimdDirective(
  2155. const OMPTargetSimdDirective &S) {
  2156. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2157. emitOMPSimdRegion(CGF, S, Action);
  2158. };
  2159. emitCommonOMPTargetDirective(*this, S, CodeGen);
  2160. }
  2161. namespace {
  2162. struct ScheduleKindModifiersTy {
  2163. OpenMPScheduleClauseKind Kind;
  2164. OpenMPScheduleClauseModifier M1;
  2165. OpenMPScheduleClauseModifier M2;
  2166. ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
  2167. OpenMPScheduleClauseModifier M1,
  2168. OpenMPScheduleClauseModifier M2)
  2169. : Kind(Kind), M1(M1), M2(M2) {}
  2170. };
  2171. } // namespace
  2172. bool CodeGenFunction::EmitOMPWorksharingLoop(
  2173. const OMPLoopDirective &S, Expr *EUB,
  2174. const CodeGenLoopBoundsTy &CodeGenLoopBounds,
  2175. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  2176. // Emit the loop iteration variable.
  2177. const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  2178. const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
  2179. EmitVarDecl(*IVDecl);
  2180. // Emit the iterations count variable.
  2181. // If it is not a variable, Sema decided to calculate iterations count on each
  2182. // iteration (e.g., it is foldable into a constant).
  2183. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  2184. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  2185. // Emit calculation of the iterations count.
  2186. EmitIgnoredExpr(S.getCalcLastIteration());
  2187. }
  2188. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  2189. bool HasLastprivateClause;
  2190. // Check pre-condition.
  2191. {
  2192. OMPLoopScope PreInitScope(*this, S);
  2193. // Skip the entire loop if we don't meet the precondition.
  2194. // If the condition constant folds and can be elided, avoid emitting the
  2195. // whole loop.
  2196. bool CondConstant;
  2197. llvm::BasicBlock *ContBlock = nullptr;
  2198. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  2199. if (!CondConstant)
  2200. return false;
  2201. } else {
  2202. llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
  2203. ContBlock = createBasicBlock("omp.precond.end");
  2204. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  2205. getProfileCount(&S));
  2206. EmitBlock(ThenBlock);
  2207. incrementProfileCounter(&S);
  2208. }
  2209. RunCleanupsScope DoacrossCleanupScope(*this);
  2210. bool Ordered = false;
  2211. if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
  2212. if (OrderedClause->getNumForLoops())
  2213. RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
  2214. else
  2215. Ordered = true;
  2216. }
  2217. llvm::DenseSet<const Expr *> EmittedFinals;
  2218. emitAlignedClause(*this, S);
  2219. bool HasLinears = EmitOMPLinearClauseInit(S);
  2220. // Emit helper vars inits.
  2221. std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
  2222. LValue LB = Bounds.first;
  2223. LValue UB = Bounds.second;
  2224. LValue ST =
  2225. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  2226. LValue IL =
  2227. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  2228. // Emit 'then' code.
  2229. {
  2230. OMPPrivateScope LoopScope(*this);
  2231. if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
  2232. // Emit implicit barrier to synchronize threads and avoid data races on
  2233. // initialization of firstprivate variables and post-update of
  2234. // lastprivate variables.
  2235. CGM.getOpenMPRuntime().emitBarrierCall(
  2236. *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  2237. /*ForceSimpleCall=*/true);
  2238. }
  2239. EmitOMPPrivateClause(S, LoopScope);
  2240. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  2241. EmitOMPReductionClauseInit(S, LoopScope);
  2242. EmitOMPPrivateLoopCounters(S, LoopScope);
  2243. EmitOMPLinearClause(S, LoopScope);
  2244. (void)LoopScope.Privatize();
  2245. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  2246. CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
  2247. // Detect the loop schedule kind and chunk.
  2248. const Expr *ChunkExpr = nullptr;
  2249. OpenMPScheduleTy ScheduleKind;
  2250. if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
  2251. ScheduleKind.Schedule = C->getScheduleKind();
  2252. ScheduleKind.M1 = C->getFirstScheduleModifier();
  2253. ScheduleKind.M2 = C->getSecondScheduleModifier();
  2254. ChunkExpr = C->getChunkSize();
  2255. } else {
  2256. // Default behaviour for schedule clause.
  2257. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
  2258. *this, S, ScheduleKind.Schedule, ChunkExpr);
  2259. }
  2260. bool HasChunkSizeOne = false;
  2261. llvm::Value *Chunk = nullptr;
  2262. if (ChunkExpr) {
  2263. Chunk = EmitScalarExpr(ChunkExpr);
  2264. Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
  2265. S.getIterationVariable()->getType(),
  2266. S.getBeginLoc());
  2267. Expr::EvalResult Result;
  2268. if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
  2269. llvm::APSInt EvaluatedChunk = Result.Val.getInt();
  2270. HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
  2271. }
  2272. }
  2273. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  2274. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  2275. // OpenMP 4.5, 2.7.1 Loop Construct, Description.
  2276. // If the static schedule kind is specified or if the ordered clause is
  2277. // specified, and if no monotonic modifier is specified, the effect will
  2278. // be as if the monotonic modifier was specified.
  2279. bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
  2280. /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
  2281. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
  2282. if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
  2283. /* Chunked */ Chunk != nullptr) ||
  2284. StaticChunkedOne) &&
  2285. !Ordered) {
  2286. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  2287. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  2288. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  2289. // When no chunk_size is specified, the iteration space is divided into
  2290. // chunks that are approximately equal in size, and at most one chunk is
  2291. // distributed to each thread. Note that the size of the chunks is
  2292. // unspecified in this case.
  2293. CGOpenMPRuntime::StaticRTInput StaticInit(
  2294. IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
  2295. UB.getAddress(), ST.getAddress(),
  2296. StaticChunkedOne ? Chunk : nullptr);
  2297. RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
  2298. ScheduleKind, StaticInit);
  2299. JumpDest LoopExit =
  2300. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  2301. // UB = min(UB, GlobalUB);
  2302. if (!StaticChunkedOne)
  2303. EmitIgnoredExpr(S.getEnsureUpperBound());
  2304. // IV = LB;
  2305. EmitIgnoredExpr(S.getInit());
  2306. // For unchunked static schedule generate:
  2307. //
  2308. // while (idx <= UB) {
  2309. // BODY;
  2310. // ++idx;
  2311. // }
  2312. //
  2313. // For static schedule with chunk one:
  2314. //
  2315. // while (IV <= PrevUB) {
  2316. // BODY;
  2317. // IV += ST;
  2318. // }
  2319. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
  2320. StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
  2321. StaticChunkedOne ? S.getDistInc() : S.getInc(),
  2322. [&S, LoopExit](CodeGenFunction &CGF) {
  2323. CGF.EmitOMPLoopBody(S, LoopExit);
  2324. CGF.EmitStopPoint(&S);
  2325. },
  2326. [](CodeGenFunction &) {});
  2327. EmitBlock(LoopExit.getBlock());
  2328. // Tell the runtime we are done.
  2329. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2330. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  2331. S.getDirectiveKind());
  2332. };
  2333. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  2334. } else {
  2335. const bool IsMonotonic =
  2336. Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
  2337. ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
  2338. ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
  2339. ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
  2340. // Emit the outer loop, which requests its work chunk [LB..UB] from
  2341. // runtime and runs the inner loop to process it.
  2342. const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
  2343. ST.getAddress(), IL.getAddress(),
  2344. Chunk, EUB);
  2345. EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
  2346. LoopArguments, CGDispatchBounds);
  2347. }
  2348. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  2349. EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
  2350. return CGF.Builder.CreateIsNotNull(
  2351. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2352. });
  2353. }
  2354. EmitOMPReductionClauseFinal(
  2355. S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
  2356. ? /*Parallel and Simd*/ OMPD_parallel_for_simd
  2357. : /*Parallel only*/ OMPD_parallel);
  2358. // Emit post-update of the reduction variables if IsLastIter != 0.
  2359. emitPostUpdateForReductionClause(
  2360. *this, S, [IL, &S](CodeGenFunction &CGF) {
  2361. return CGF.Builder.CreateIsNotNull(
  2362. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2363. });
  2364. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2365. if (HasLastprivateClause)
  2366. EmitOMPLastprivateClauseFinal(
  2367. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  2368. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
  2369. }
  2370. EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
  2371. return CGF.Builder.CreateIsNotNull(
  2372. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2373. });
  2374. DoacrossCleanupScope.ForceCleanup();
  2375. // We're now done with the loop, so jump to the continuation block.
  2376. if (ContBlock) {
  2377. EmitBranch(ContBlock);
  2378. EmitBlock(ContBlock, /*IsFinished=*/true);
  2379. }
  2380. }
  2381. return HasLastprivateClause;
  2382. }
  2383. /// The following two functions generate expressions for the loop lower
  2384. /// and upper bounds in case of static and dynamic (dispatch) schedule
  2385. /// of the associated 'for' or 'distribute' loop.
  2386. static std::pair<LValue, LValue>
  2387. emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  2388. const auto &LS = cast<OMPLoopDirective>(S);
  2389. LValue LB =
  2390. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  2391. LValue UB =
  2392. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  2393. return {LB, UB};
  2394. }
  2395. /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
  2396. /// consider the lower and upper bound expressions generated by the
  2397. /// worksharing loop support, but we use 0 and the iteration space size as
  2398. /// constants
  2399. static std::pair<llvm::Value *, llvm::Value *>
  2400. emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
  2401. Address LB, Address UB) {
  2402. const auto &LS = cast<OMPLoopDirective>(S);
  2403. const Expr *IVExpr = LS.getIterationVariable();
  2404. const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
  2405. llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
  2406. llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
  2407. return {LBVal, UBVal};
  2408. }
  2409. void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
  2410. bool HasLastprivates = false;
  2411. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2412. PrePostActionTy &) {
  2413. OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
  2414. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2415. emitForLoopBounds,
  2416. emitDispatchForLoopBounds);
  2417. };
  2418. {
  2419. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2420. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
  2421. S.hasCancel());
  2422. }
  2423. // Emit an implicit barrier at the end.
  2424. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
  2425. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
  2426. }
  2427. void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
  2428. bool HasLastprivates = false;
  2429. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2430. PrePostActionTy &) {
  2431. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2432. emitForLoopBounds,
  2433. emitDispatchForLoopBounds);
  2434. };
  2435. {
  2436. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2437. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  2438. }
  2439. // Emit an implicit barrier at the end.
  2440. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
  2441. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
  2442. }
  2443. static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
  2444. const Twine &Name,
  2445. llvm::Value *Init = nullptr) {
  2446. LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
  2447. if (Init)
  2448. CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
  2449. return LVal;
  2450. }
  2451. void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
  2452. const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
  2453. const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
  2454. bool HasLastprivates = false;
  2455. auto &&CodeGen = [&S, CapturedStmt, CS,
  2456. &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
  2457. ASTContext &C = CGF.getContext();
  2458. QualType KmpInt32Ty =
  2459. C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  2460. // Emit helper vars inits.
  2461. LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
  2462. CGF.Builder.getInt32(0));
  2463. llvm::ConstantInt *GlobalUBVal = CS != nullptr
  2464. ? CGF.Builder.getInt32(CS->size() - 1)
  2465. : CGF.Builder.getInt32(0);
  2466. LValue UB =
  2467. createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
  2468. LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
  2469. CGF.Builder.getInt32(1));
  2470. LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
  2471. CGF.Builder.getInt32(0));
  2472. // Loop counter.
  2473. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
  2474. OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
  2475. CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
  2476. OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
  2477. CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
  2478. // Generate condition for loop.
  2479. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
  2480. OK_Ordinary, S.getBeginLoc(), FPOptions());
  2481. // Increment for loop counter.
  2482. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
  2483. S.getBeginLoc(), true);
  2484. auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
  2485. // Iterate through all sections and emit a switch construct:
  2486. // switch (IV) {
  2487. // case 0:
  2488. // <SectionStmt[0]>;
  2489. // break;
  2490. // ...
  2491. // case <NumSection> - 1:
  2492. // <SectionStmt[<NumSection> - 1]>;
  2493. // break;
  2494. // }
  2495. // .omp.sections.exit:
  2496. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
  2497. llvm::SwitchInst *SwitchStmt =
  2498. CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
  2499. ExitBB, CS == nullptr ? 1 : CS->size());
  2500. if (CS) {
  2501. unsigned CaseNumber = 0;
  2502. for (const Stmt *SubStmt : CS->children()) {
  2503. auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2504. CGF.EmitBlock(CaseBB);
  2505. SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
  2506. CGF.EmitStmt(SubStmt);
  2507. CGF.EmitBranch(ExitBB);
  2508. ++CaseNumber;
  2509. }
  2510. } else {
  2511. llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2512. CGF.EmitBlock(CaseBB);
  2513. SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
  2514. CGF.EmitStmt(CapturedStmt);
  2515. CGF.EmitBranch(ExitBB);
  2516. }
  2517. CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
  2518. };
  2519. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  2520. if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
  2521. // Emit implicit barrier to synchronize threads and avoid data races on
  2522. // initialization of firstprivate variables and post-update of lastprivate
  2523. // variables.
  2524. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  2525. CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  2526. /*ForceSimpleCall=*/true);
  2527. }
  2528. CGF.EmitOMPPrivateClause(S, LoopScope);
  2529. HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  2530. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  2531. (void)LoopScope.Privatize();
  2532. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  2533. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  2534. // Emit static non-chunked loop.
  2535. OpenMPScheduleTy ScheduleKind;
  2536. ScheduleKind.Schedule = OMPC_SCHEDULE_static;
  2537. CGOpenMPRuntime::StaticRTInput StaticInit(
  2538. /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
  2539. LB.getAddress(), UB.getAddress(), ST.getAddress());
  2540. CGF.CGM.getOpenMPRuntime().emitForStaticInit(
  2541. CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
  2542. // UB = min(UB, GlobalUB);
  2543. llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
  2544. llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
  2545. CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
  2546. CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
  2547. // IV = LB;
  2548. CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
  2549. // while (idx <= UB) { BODY; ++idx; }
  2550. CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
  2551. [](CodeGenFunction &) {});
  2552. // Tell the runtime we are done.
  2553. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2554. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  2555. S.getDirectiveKind());
  2556. };
  2557. CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
  2558. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  2559. // Emit post-update of the reduction variables if IsLastIter != 0.
  2560. emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
  2561. return CGF.Builder.CreateIsNotNull(
  2562. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2563. });
  2564. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2565. if (HasLastprivates)
  2566. CGF.EmitOMPLastprivateClauseFinal(
  2567. S, /*NoFinals=*/false,
  2568. CGF.Builder.CreateIsNotNull(
  2569. CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
  2570. };
  2571. bool HasCancel = false;
  2572. if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
  2573. HasCancel = OSD->hasCancel();
  2574. else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
  2575. HasCancel = OPSD->hasCancel();
  2576. OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
  2577. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
  2578. HasCancel);
  2579. // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
  2580. // clause. Otherwise the barrier will be generated by the codegen for the
  2581. // directive.
  2582. if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
  2583. // Emit implicit barrier to synchronize threads and avoid data races on
  2584. // initialization of firstprivate variables.
  2585. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
  2586. OMPD_unknown);
  2587. }
  2588. }
  2589. void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
  2590. {
  2591. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2592. EmitSections(S);
  2593. }
  2594. // Emit an implicit barrier at the end.
  2595. if (!S.getSingleClause<OMPNowaitClause>()) {
  2596. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
  2597. OMPD_sections);
  2598. }
  2599. }
  2600. void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
  2601. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2602. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2603. };
  2604. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2605. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
  2606. S.hasCancel());
  2607. }
  2608. void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
  2609. llvm::SmallVector<const Expr *, 8> CopyprivateVars;
  2610. llvm::SmallVector<const Expr *, 8> DestExprs;
  2611. llvm::SmallVector<const Expr *, 8> SrcExprs;
  2612. llvm::SmallVector<const Expr *, 8> AssignmentOps;
  2613. // Check if there are any 'copyprivate' clauses associated with this
  2614. // 'single' construct.
  2615. // Build a list of copyprivate variables along with helper expressions
  2616. // (<source>, <destination>, <destination>=<source> expressions)
  2617. for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
  2618. CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
  2619. DestExprs.append(C->destination_exprs().begin(),
  2620. C->destination_exprs().end());
  2621. SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
  2622. AssignmentOps.append(C->assignment_ops().begin(),
  2623. C->assignment_ops().end());
  2624. }
  2625. // Emit code for 'single' region along with 'copyprivate' clauses
  2626. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2627. Action.Enter(CGF);
  2628. OMPPrivateScope SingleScope(CGF);
  2629. (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
  2630. CGF.EmitOMPPrivateClause(S, SingleScope);
  2631. (void)SingleScope.Privatize();
  2632. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2633. };
  2634. {
  2635. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2636. CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
  2637. CopyprivateVars, DestExprs,
  2638. SrcExprs, AssignmentOps);
  2639. }
  2640. // Emit an implicit barrier at the end (to avoid data race on firstprivate
  2641. // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
  2642. if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
  2643. CGM.getOpenMPRuntime().emitBarrierCall(
  2644. *this, S.getBeginLoc(),
  2645. S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
  2646. }
  2647. }
  2648. void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
  2649. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2650. Action.Enter(CGF);
  2651. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2652. };
  2653. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2654. CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
  2655. }
  2656. void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
  2657. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2658. Action.Enter(CGF);
  2659. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2660. };
  2661. const Expr *Hint = nullptr;
  2662. if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
  2663. Hint = HintClause->getHint();
  2664. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2665. CGM.getOpenMPRuntime().emitCriticalRegion(*this,
  2666. S.getDirectiveName().getAsString(),
  2667. CodeGen, S.getBeginLoc(), Hint);
  2668. }
  2669. void CodeGenFunction::EmitOMPParallelForDirective(
  2670. const OMPParallelForDirective &S) {
  2671. // Emit directive as a combined directive that consists of two implicit
  2672. // directives: 'parallel' with 'for' directive.
  2673. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2674. Action.Enter(CGF);
  2675. OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
  2676. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2677. emitDispatchForLoopBounds);
  2678. };
  2679. emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
  2680. emitEmptyBoundParameters);
  2681. }
  2682. void CodeGenFunction::EmitOMPParallelForSimdDirective(
  2683. const OMPParallelForSimdDirective &S) {
  2684. // Emit directive as a combined directive that consists of two implicit
  2685. // directives: 'parallel' with 'for' directive.
  2686. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2687. Action.Enter(CGF);
  2688. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2689. emitDispatchForLoopBounds);
  2690. };
  2691. emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
  2692. emitEmptyBoundParameters);
  2693. }
  2694. void CodeGenFunction::EmitOMPParallelSectionsDirective(
  2695. const OMPParallelSectionsDirective &S) {
  2696. // Emit directive as a combined directive that consists of two implicit
  2697. // directives: 'parallel' with 'sections' directive.
  2698. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2699. Action.Enter(CGF);
  2700. CGF.EmitSections(S);
  2701. };
  2702. emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
  2703. emitEmptyBoundParameters);
  2704. }
  2705. void CodeGenFunction::EmitOMPTaskBasedDirective(
  2706. const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
  2707. const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
  2708. OMPTaskDataTy &Data) {
  2709. // Emit outlined function for task construct.
  2710. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
  2711. auto I = CS->getCapturedDecl()->param_begin();
  2712. auto PartId = std::next(I);
  2713. auto TaskT = std::next(I, 4);
  2714. // Check if the task is final
  2715. if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
  2716. // If the condition constant folds and can be elided, try to avoid emitting
  2717. // the condition and the dead arm of the if/else.
  2718. const Expr *Cond = Clause->getCondition();
  2719. bool CondConstant;
  2720. if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
  2721. Data.Final.setInt(CondConstant);
  2722. else
  2723. Data.Final.setPointer(EvaluateExprAsBool(Cond));
  2724. } else {
  2725. // By default the task is not final.
  2726. Data.Final.setInt(/*IntVal=*/false);
  2727. }
  2728. // Check if the task has 'priority' clause.
  2729. if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
  2730. const Expr *Prio = Clause->getPriority();
  2731. Data.Priority.setInt(/*IntVal=*/true);
  2732. Data.Priority.setPointer(EmitScalarConversion(
  2733. EmitScalarExpr(Prio), Prio->getType(),
  2734. getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
  2735. Prio->getExprLoc()));
  2736. }
  2737. // The first function argument for tasks is a thread id, the second one is a
  2738. // part id (0 for tied tasks, >=0 for untied task).
  2739. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  2740. // Get list of private variables.
  2741. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
  2742. auto IRef = C->varlist_begin();
  2743. for (const Expr *IInit : C->private_copies()) {
  2744. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2745. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2746. Data.PrivateVars.push_back(*IRef);
  2747. Data.PrivateCopies.push_back(IInit);
  2748. }
  2749. ++IRef;
  2750. }
  2751. }
  2752. EmittedAsPrivate.clear();
  2753. // Get list of firstprivate variables.
  2754. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  2755. auto IRef = C->varlist_begin();
  2756. auto IElemInitRef = C->inits().begin();
  2757. for (const Expr *IInit : C->private_copies()) {
  2758. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2759. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2760. Data.FirstprivateVars.push_back(*IRef);
  2761. Data.FirstprivateCopies.push_back(IInit);
  2762. Data.FirstprivateInits.push_back(*IElemInitRef);
  2763. }
  2764. ++IRef;
  2765. ++IElemInitRef;
  2766. }
  2767. }
  2768. // Get list of lastprivate variables (for taskloops).
  2769. llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
  2770. for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
  2771. auto IRef = C->varlist_begin();
  2772. auto ID = C->destination_exprs().begin();
  2773. for (const Expr *IInit : C->private_copies()) {
  2774. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2775. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2776. Data.LastprivateVars.push_back(*IRef);
  2777. Data.LastprivateCopies.push_back(IInit);
  2778. }
  2779. LastprivateDstsOrigs.insert(
  2780. {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
  2781. cast<DeclRefExpr>(*IRef)});
  2782. ++IRef;
  2783. ++ID;
  2784. }
  2785. }
  2786. SmallVector<const Expr *, 4> LHSs;
  2787. SmallVector<const Expr *, 4> RHSs;
  2788. for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
  2789. auto IPriv = C->privates().begin();
  2790. auto IRed = C->reduction_ops().begin();
  2791. auto ILHS = C->lhs_exprs().begin();
  2792. auto IRHS = C->rhs_exprs().begin();
  2793. for (const Expr *Ref : C->varlists()) {
  2794. Data.ReductionVars.emplace_back(Ref);
  2795. Data.ReductionCopies.emplace_back(*IPriv);
  2796. Data.ReductionOps.emplace_back(*IRed);
  2797. LHSs.emplace_back(*ILHS);
  2798. RHSs.emplace_back(*IRHS);
  2799. std::advance(IPriv, 1);
  2800. std::advance(IRed, 1);
  2801. std::advance(ILHS, 1);
  2802. std::advance(IRHS, 1);
  2803. }
  2804. }
  2805. Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
  2806. *this, S.getBeginLoc(), LHSs, RHSs, Data);
  2807. // Build list of dependences.
  2808. for (const auto *C : S.getClausesOfKind<OMPDependClause>())
  2809. for (const Expr *IRef : C->varlists())
  2810. Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
  2811. auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
  2812. CapturedRegion](CodeGenFunction &CGF,
  2813. PrePostActionTy &Action) {
  2814. // Set proper addresses for generated private copies.
  2815. OMPPrivateScope Scope(CGF);
  2816. if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
  2817. !Data.LastprivateVars.empty()) {
  2818. llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
  2819. CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
  2820. enum { PrivatesParam = 2, CopyFnParam = 3 };
  2821. llvm::Value *CopyFn = CGF.Builder.CreateLoad(
  2822. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
  2823. llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
  2824. CS->getCapturedDecl()->getParam(PrivatesParam)));
  2825. // Map privates.
  2826. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
  2827. llvm::SmallVector<llvm::Value *, 16> CallArgs;
  2828. CallArgs.push_back(PrivatesPtr);
  2829. for (const Expr *E : Data.PrivateVars) {
  2830. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2831. Address PrivatePtr = CGF.CreateMemTemp(
  2832. CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
  2833. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2834. CallArgs.push_back(PrivatePtr.getPointer());
  2835. }
  2836. for (const Expr *E : Data.FirstprivateVars) {
  2837. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2838. Address PrivatePtr =
  2839. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2840. ".firstpriv.ptr.addr");
  2841. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2842. CallArgs.push_back(PrivatePtr.getPointer());
  2843. }
  2844. for (const Expr *E : Data.LastprivateVars) {
  2845. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2846. Address PrivatePtr =
  2847. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2848. ".lastpriv.ptr.addr");
  2849. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2850. CallArgs.push_back(PrivatePtr.getPointer());
  2851. }
  2852. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
  2853. CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
  2854. for (const auto &Pair : LastprivateDstsOrigs) {
  2855. const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
  2856. DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
  2857. /*RefersToEnclosingVariableOrCapture=*/
  2858. CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
  2859. Pair.second->getType(), VK_LValue,
  2860. Pair.second->getExprLoc());
  2861. Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
  2862. return CGF.EmitLValue(&DRE).getAddress();
  2863. });
  2864. }
  2865. for (const auto &Pair : PrivatePtrs) {
  2866. Address Replacement(CGF.Builder.CreateLoad(Pair.second),
  2867. CGF.getContext().getDeclAlign(Pair.first));
  2868. Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
  2869. }
  2870. }
  2871. if (Data.Reductions) {
  2872. OMPLexicalScope LexScope(CGF, S, CapturedRegion);
  2873. ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
  2874. Data.ReductionOps);
  2875. llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
  2876. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
  2877. for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
  2878. RedCG.emitSharedLValue(CGF, Cnt);
  2879. RedCG.emitAggregateType(CGF, Cnt);
  2880. // FIXME: This must removed once the runtime library is fixed.
  2881. // Emit required threadprivate variables for
  2882. // initializer/combiner/finalizer.
  2883. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
  2884. RedCG, Cnt);
  2885. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2886. CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2887. Replacement =
  2888. Address(CGF.EmitScalarConversion(
  2889. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2890. CGF.getContext().getPointerType(
  2891. Data.ReductionCopies[Cnt]->getType()),
  2892. Data.ReductionCopies[Cnt]->getExprLoc()),
  2893. Replacement.getAlignment());
  2894. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2895. Scope.addPrivate(RedCG.getBaseDecl(Cnt),
  2896. [Replacement]() { return Replacement; });
  2897. }
  2898. }
  2899. // Privatize all private variables except for in_reduction items.
  2900. (void)Scope.Privatize();
  2901. SmallVector<const Expr *, 4> InRedVars;
  2902. SmallVector<const Expr *, 4> InRedPrivs;
  2903. SmallVector<const Expr *, 4> InRedOps;
  2904. SmallVector<const Expr *, 4> TaskgroupDescriptors;
  2905. for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
  2906. auto IPriv = C->privates().begin();
  2907. auto IRed = C->reduction_ops().begin();
  2908. auto ITD = C->taskgroup_descriptors().begin();
  2909. for (const Expr *Ref : C->varlists()) {
  2910. InRedVars.emplace_back(Ref);
  2911. InRedPrivs.emplace_back(*IPriv);
  2912. InRedOps.emplace_back(*IRed);
  2913. TaskgroupDescriptors.emplace_back(*ITD);
  2914. std::advance(IPriv, 1);
  2915. std::advance(IRed, 1);
  2916. std::advance(ITD, 1);
  2917. }
  2918. }
  2919. // Privatize in_reduction items here, because taskgroup descriptors must be
  2920. // privatized earlier.
  2921. OMPPrivateScope InRedScope(CGF);
  2922. if (!InRedVars.empty()) {
  2923. ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
  2924. for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
  2925. RedCG.emitSharedLValue(CGF, Cnt);
  2926. RedCG.emitAggregateType(CGF, Cnt);
  2927. // The taskgroup descriptor variable is always implicit firstprivate and
  2928. // privatized already during processing of the firstprivates.
  2929. // FIXME: This must removed once the runtime library is fixed.
  2930. // Emit required threadprivate variables for
  2931. // initializer/combiner/finalizer.
  2932. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
  2933. RedCG, Cnt);
  2934. llvm::Value *ReductionsPtr =
  2935. CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
  2936. TaskgroupDescriptors[Cnt]->getExprLoc());
  2937. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2938. CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2939. Replacement = Address(
  2940. CGF.EmitScalarConversion(
  2941. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2942. CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
  2943. InRedPrivs[Cnt]->getExprLoc()),
  2944. Replacement.getAlignment());
  2945. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2946. InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
  2947. [Replacement]() { return Replacement; });
  2948. }
  2949. }
  2950. (void)InRedScope.Privatize();
  2951. Action.Enter(CGF);
  2952. BodyGen(CGF);
  2953. };
  2954. llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
  2955. S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
  2956. Data.NumberOfParts);
  2957. OMPLexicalScope Scope(*this, S);
  2958. TaskGen(*this, OutlinedFn, Data);
  2959. }
  2960. static ImplicitParamDecl *
  2961. createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
  2962. QualType Ty, CapturedDecl *CD,
  2963. SourceLocation Loc) {
  2964. auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
  2965. ImplicitParamDecl::Other);
  2966. auto *OrigRef = DeclRefExpr::Create(
  2967. C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
  2968. /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
  2969. auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
  2970. ImplicitParamDecl::Other);
  2971. auto *PrivateRef = DeclRefExpr::Create(
  2972. C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
  2973. /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
  2974. QualType ElemType = C.getBaseElementType(Ty);
  2975. auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
  2976. ImplicitParamDecl::Other);
  2977. auto *InitRef = DeclRefExpr::Create(
  2978. C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
  2979. /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
  2980. PrivateVD->setInitStyle(VarDecl::CInit);
  2981. PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
  2982. InitRef, /*BasePath=*/nullptr,
  2983. VK_RValue));
  2984. Data.FirstprivateVars.emplace_back(OrigRef);
  2985. Data.FirstprivateCopies.emplace_back(PrivateRef);
  2986. Data.FirstprivateInits.emplace_back(InitRef);
  2987. return OrigVD;
  2988. }
  2989. void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
  2990. const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
  2991. OMPTargetDataInfo &InputInfo) {
  2992. // Emit outlined function for task construct.
  2993. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
  2994. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  2995. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  2996. auto I = CS->getCapturedDecl()->param_begin();
  2997. auto PartId = std::next(I);
  2998. auto TaskT = std::next(I, 4);
  2999. OMPTaskDataTy Data;
  3000. // The task is not final.
  3001. Data.Final.setInt(/*IntVal=*/false);
  3002. // Get list of firstprivate variables.
  3003. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  3004. auto IRef = C->varlist_begin();
  3005. auto IElemInitRef = C->inits().begin();
  3006. for (auto *IInit : C->private_copies()) {
  3007. Data.FirstprivateVars.push_back(*IRef);
  3008. Data.FirstprivateCopies.push_back(IInit);
  3009. Data.FirstprivateInits.push_back(*IElemInitRef);
  3010. ++IRef;
  3011. ++IElemInitRef;
  3012. }
  3013. }
  3014. OMPPrivateScope TargetScope(*this);
  3015. VarDecl *BPVD = nullptr;
  3016. VarDecl *PVD = nullptr;
  3017. VarDecl *SVD = nullptr;
  3018. if (InputInfo.NumberOfTargetItems > 0) {
  3019. auto *CD = CapturedDecl::Create(
  3020. getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
  3021. llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
  3022. QualType BaseAndPointersType = getContext().getConstantArrayType(
  3023. getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
  3024. /*IndexTypeQuals=*/0);
  3025. BPVD = createImplicitFirstprivateForType(
  3026. getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
  3027. PVD = createImplicitFirstprivateForType(
  3028. getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
  3029. QualType SizesType = getContext().getConstantArrayType(
  3030. getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
  3031. ArrSize, nullptr, ArrayType::Normal,
  3032. /*IndexTypeQuals=*/0);
  3033. SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
  3034. S.getBeginLoc());
  3035. TargetScope.addPrivate(
  3036. BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
  3037. TargetScope.addPrivate(PVD,
  3038. [&InputInfo]() { return InputInfo.PointersArray; });
  3039. TargetScope.addPrivate(SVD,
  3040. [&InputInfo]() { return InputInfo.SizesArray; });
  3041. }
  3042. (void)TargetScope.Privatize();
  3043. // Build list of dependences.
  3044. for (const auto *C : S.getClausesOfKind<OMPDependClause>())
  3045. for (const Expr *IRef : C->varlists())
  3046. Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
  3047. auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
  3048. &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3049. // Set proper addresses for generated private copies.
  3050. OMPPrivateScope Scope(CGF);
  3051. if (!Data.FirstprivateVars.empty()) {
  3052. llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
  3053. CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
  3054. enum { PrivatesParam = 2, CopyFnParam = 3 };
  3055. llvm::Value *CopyFn = CGF.Builder.CreateLoad(
  3056. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
  3057. llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
  3058. CS->getCapturedDecl()->getParam(PrivatesParam)));
  3059. // Map privates.
  3060. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
  3061. llvm::SmallVector<llvm::Value *, 16> CallArgs;
  3062. CallArgs.push_back(PrivatesPtr);
  3063. for (const Expr *E : Data.FirstprivateVars) {
  3064. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3065. Address PrivatePtr =
  3066. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  3067. ".firstpriv.ptr.addr");
  3068. PrivatePtrs.emplace_back(VD, PrivatePtr);
  3069. CallArgs.push_back(PrivatePtr.getPointer());
  3070. }
  3071. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
  3072. CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
  3073. for (const auto &Pair : PrivatePtrs) {
  3074. Address Replacement(CGF.Builder.CreateLoad(Pair.second),
  3075. CGF.getContext().getDeclAlign(Pair.first));
  3076. Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
  3077. }
  3078. }
  3079. // Privatize all private variables except for in_reduction items.
  3080. (void)Scope.Privatize();
  3081. if (InputInfo.NumberOfTargetItems > 0) {
  3082. InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
  3083. CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
  3084. InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
  3085. CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
  3086. InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
  3087. CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
  3088. }
  3089. Action.Enter(CGF);
  3090. OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
  3091. BodyGen(CGF);
  3092. };
  3093. llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
  3094. S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
  3095. Data.NumberOfParts);
  3096. llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
  3097. IntegerLiteral IfCond(getContext(), TrueOrFalse,
  3098. getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
  3099. SourceLocation());
  3100. CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
  3101. SharedsTy, CapturedStruct, &IfCond, Data);
  3102. }
  3103. void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
  3104. // Emit outlined function for task construct.
  3105. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
  3106. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  3107. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  3108. const Expr *IfCond = nullptr;
  3109. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3110. if (C->getNameModifier() == OMPD_unknown ||
  3111. C->getNameModifier() == OMPD_task) {
  3112. IfCond = C->getCondition();
  3113. break;
  3114. }
  3115. }
  3116. OMPTaskDataTy Data;
  3117. // Check if we should emit tied or untied task.
  3118. Data.Tied = !S.getSingleClause<OMPUntiedClause>();
  3119. auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
  3120. CGF.EmitStmt(CS->getCapturedStmt());
  3121. };
  3122. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  3123. IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
  3124. const OMPTaskDataTy &Data) {
  3125. CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
  3126. SharedsTy, CapturedStruct, IfCond,
  3127. Data);
  3128. };
  3129. EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
  3130. }
  3131. void CodeGenFunction::EmitOMPTaskyieldDirective(
  3132. const OMPTaskyieldDirective &S) {
  3133. CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
  3134. }
  3135. void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
  3136. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
  3137. }
  3138. void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
  3139. CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
  3140. }
  3141. void CodeGenFunction::EmitOMPTaskgroupDirective(
  3142. const OMPTaskgroupDirective &S) {
  3143. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3144. Action.Enter(CGF);
  3145. if (const Expr *E = S.getReductionRef()) {
  3146. SmallVector<const Expr *, 4> LHSs;
  3147. SmallVector<const Expr *, 4> RHSs;
  3148. OMPTaskDataTy Data;
  3149. for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
  3150. auto IPriv = C->privates().begin();
  3151. auto IRed = C->reduction_ops().begin();
  3152. auto ILHS = C->lhs_exprs().begin();
  3153. auto IRHS = C->rhs_exprs().begin();
  3154. for (const Expr *Ref : C->varlists()) {
  3155. Data.ReductionVars.emplace_back(Ref);
  3156. Data.ReductionCopies.emplace_back(*IPriv);
  3157. Data.ReductionOps.emplace_back(*IRed);
  3158. LHSs.emplace_back(*ILHS);
  3159. RHSs.emplace_back(*IRHS);
  3160. std::advance(IPriv, 1);
  3161. std::advance(IRed, 1);
  3162. std::advance(ILHS, 1);
  3163. std::advance(IRHS, 1);
  3164. }
  3165. }
  3166. llvm::Value *ReductionDesc =
  3167. CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
  3168. LHSs, RHSs, Data);
  3169. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3170. CGF.EmitVarDecl(*VD);
  3171. CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
  3172. /*Volatile=*/false, E->getType());
  3173. }
  3174. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  3175. };
  3176. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3177. CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
  3178. }
  3179. void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
  3180. CGM.getOpenMPRuntime().emitFlush(
  3181. *this,
  3182. [&S]() -> ArrayRef<const Expr *> {
  3183. if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
  3184. return llvm::makeArrayRef(FlushClause->varlist_begin(),
  3185. FlushClause->varlist_end());
  3186. return llvm::None;
  3187. }(),
  3188. S.getBeginLoc());
  3189. }
  3190. void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
  3191. const CodeGenLoopTy &CodeGenLoop,
  3192. Expr *IncExpr) {
  3193. // Emit the loop iteration variable.
  3194. const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  3195. const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
  3196. EmitVarDecl(*IVDecl);
  3197. // Emit the iterations count variable.
  3198. // If it is not a variable, Sema decided to calculate iterations count on each
  3199. // iteration (e.g., it is foldable into a constant).
  3200. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  3201. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  3202. // Emit calculation of the iterations count.
  3203. EmitIgnoredExpr(S.getCalcLastIteration());
  3204. }
  3205. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  3206. bool HasLastprivateClause = false;
  3207. // Check pre-condition.
  3208. {
  3209. OMPLoopScope PreInitScope(*this, S);
  3210. // Skip the entire loop if we don't meet the precondition.
  3211. // If the condition constant folds and can be elided, avoid emitting the
  3212. // whole loop.
  3213. bool CondConstant;
  3214. llvm::BasicBlock *ContBlock = nullptr;
  3215. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  3216. if (!CondConstant)
  3217. return;
  3218. } else {
  3219. llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
  3220. ContBlock = createBasicBlock("omp.precond.end");
  3221. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  3222. getProfileCount(&S));
  3223. EmitBlock(ThenBlock);
  3224. incrementProfileCounter(&S);
  3225. }
  3226. emitAlignedClause(*this, S);
  3227. // Emit 'then' code.
  3228. {
  3229. // Emit helper vars inits.
  3230. LValue LB = EmitOMPHelperVar(
  3231. *this, cast<DeclRefExpr>(
  3232. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3233. ? S.getCombinedLowerBoundVariable()
  3234. : S.getLowerBoundVariable())));
  3235. LValue UB = EmitOMPHelperVar(
  3236. *this, cast<DeclRefExpr>(
  3237. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3238. ? S.getCombinedUpperBoundVariable()
  3239. : S.getUpperBoundVariable())));
  3240. LValue ST =
  3241. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  3242. LValue IL =
  3243. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  3244. OMPPrivateScope LoopScope(*this);
  3245. if (EmitOMPFirstprivateClause(S, LoopScope)) {
  3246. // Emit implicit barrier to synchronize threads and avoid data races
  3247. // on initialization of firstprivate variables and post-update of
  3248. // lastprivate variables.
  3249. CGM.getOpenMPRuntime().emitBarrierCall(
  3250. *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  3251. /*ForceSimpleCall=*/true);
  3252. }
  3253. EmitOMPPrivateClause(S, LoopScope);
  3254. if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
  3255. !isOpenMPParallelDirective(S.getDirectiveKind()) &&
  3256. !isOpenMPTeamsDirective(S.getDirectiveKind()))
  3257. EmitOMPReductionClauseInit(S, LoopScope);
  3258. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  3259. EmitOMPPrivateLoopCounters(S, LoopScope);
  3260. (void)LoopScope.Privatize();
  3261. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  3262. CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
  3263. // Detect the distribute schedule kind and chunk.
  3264. llvm::Value *Chunk = nullptr;
  3265. OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
  3266. if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
  3267. ScheduleKind = C->getDistScheduleKind();
  3268. if (const Expr *Ch = C->getChunkSize()) {
  3269. Chunk = EmitScalarExpr(Ch);
  3270. Chunk = EmitScalarConversion(Chunk, Ch->getType(),
  3271. S.getIterationVariable()->getType(),
  3272. S.getBeginLoc());
  3273. }
  3274. } else {
  3275. // Default behaviour for dist_schedule clause.
  3276. CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
  3277. *this, S, ScheduleKind, Chunk);
  3278. }
  3279. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  3280. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  3281. // OpenMP [2.10.8, distribute Construct, Description]
  3282. // If dist_schedule is specified, kind must be static. If specified,
  3283. // iterations are divided into chunks of size chunk_size, chunks are
  3284. // assigned to the teams of the league in a round-robin fashion in the
  3285. // order of the team number. When no chunk_size is specified, the
  3286. // iteration space is divided into chunks that are approximately equal
  3287. // in size, and at most one chunk is distributed to each team of the
  3288. // league. The size of the chunks is unspecified in this case.
  3289. bool StaticChunked = RT.isStaticChunked(
  3290. ScheduleKind, /* Chunked */ Chunk != nullptr) &&
  3291. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
  3292. if (RT.isStaticNonchunked(ScheduleKind,
  3293. /* Chunked */ Chunk != nullptr) ||
  3294. StaticChunked) {
  3295. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  3296. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  3297. CGOpenMPRuntime::StaticRTInput StaticInit(
  3298. IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
  3299. LB.getAddress(), UB.getAddress(), ST.getAddress(),
  3300. StaticChunked ? Chunk : nullptr);
  3301. RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
  3302. StaticInit);
  3303. JumpDest LoopExit =
  3304. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  3305. // UB = min(UB, GlobalUB);
  3306. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3307. ? S.getCombinedEnsureUpperBound()
  3308. : S.getEnsureUpperBound());
  3309. // IV = LB;
  3310. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3311. ? S.getCombinedInit()
  3312. : S.getInit());
  3313. const Expr *Cond =
  3314. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3315. ? S.getCombinedCond()
  3316. : S.getCond();
  3317. if (StaticChunked)
  3318. Cond = S.getCombinedDistCond();
  3319. // For static unchunked schedules generate:
  3320. //
  3321. // 1. For distribute alone, codegen
  3322. // while (idx <= UB) {
  3323. // BODY;
  3324. // ++idx;
  3325. // }
  3326. //
  3327. // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
  3328. // while (idx <= UB) {
  3329. // <CodeGen rest of pragma>(LB, UB);
  3330. // idx += ST;
  3331. // }
  3332. //
  3333. // For static chunk one schedule generate:
  3334. //
  3335. // while (IV <= GlobalUB) {
  3336. // <CodeGen rest of pragma>(LB, UB);
  3337. // LB += ST;
  3338. // UB += ST;
  3339. // UB = min(UB, GlobalUB);
  3340. // IV = LB;
  3341. // }
  3342. //
  3343. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
  3344. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  3345. CodeGenLoop(CGF, S, LoopExit);
  3346. },
  3347. [&S, StaticChunked](CodeGenFunction &CGF) {
  3348. if (StaticChunked) {
  3349. CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
  3350. CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
  3351. CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
  3352. CGF.EmitIgnoredExpr(S.getCombinedInit());
  3353. }
  3354. });
  3355. EmitBlock(LoopExit.getBlock());
  3356. // Tell the runtime we are done.
  3357. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
  3358. } else {
  3359. // Emit the outer loop, which requests its work chunk [LB..UB] from
  3360. // runtime and runs the inner loop to process it.
  3361. const OMPLoopArguments LoopArguments = {
  3362. LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
  3363. Chunk};
  3364. EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
  3365. CodeGenLoop);
  3366. }
  3367. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  3368. EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
  3369. return CGF.Builder.CreateIsNotNull(
  3370. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  3371. });
  3372. }
  3373. if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
  3374. !isOpenMPParallelDirective(S.getDirectiveKind()) &&
  3375. !isOpenMPTeamsDirective(S.getDirectiveKind())) {
  3376. EmitOMPReductionClauseFinal(S, OMPD_simd);
  3377. // Emit post-update of the reduction variables if IsLastIter != 0.
  3378. emitPostUpdateForReductionClause(
  3379. *this, S, [IL, &S](CodeGenFunction &CGF) {
  3380. return CGF.Builder.CreateIsNotNull(
  3381. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  3382. });
  3383. }
  3384. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  3385. if (HasLastprivateClause) {
  3386. EmitOMPLastprivateClauseFinal(
  3387. S, /*NoFinals=*/false,
  3388. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
  3389. }
  3390. }
  3391. // We're now done with the loop, so jump to the continuation block.
  3392. if (ContBlock) {
  3393. EmitBranch(ContBlock);
  3394. EmitBlock(ContBlock, true);
  3395. }
  3396. }
  3397. }
  3398. void CodeGenFunction::EmitOMPDistributeDirective(
  3399. const OMPDistributeDirective &S) {
  3400. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3401. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3402. };
  3403. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3404. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  3405. }
  3406. static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
  3407. const CapturedStmt *S) {
  3408. CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
  3409. CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
  3410. CGF.CapturedStmtInfo = &CapStmtInfo;
  3411. llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
  3412. Fn->setDoesNotRecurse();
  3413. return Fn;
  3414. }
  3415. void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
  3416. if (S.hasClausesOfKind<OMPDependClause>()) {
  3417. assert(!S.getAssociatedStmt() &&
  3418. "No associated statement must be in ordered depend construct.");
  3419. for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
  3420. CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
  3421. return;
  3422. }
  3423. const auto *C = S.getSingleClause<OMPSIMDClause>();
  3424. auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
  3425. PrePostActionTy &Action) {
  3426. const CapturedStmt *CS = S.getInnermostCapturedStmt();
  3427. if (C) {
  3428. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3429. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  3430. llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
  3431. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
  3432. OutlinedFn, CapturedVars);
  3433. } else {
  3434. Action.Enter(CGF);
  3435. CGF.EmitStmt(CS->getCapturedStmt());
  3436. }
  3437. };
  3438. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3439. CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
  3440. }
  3441. static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
  3442. QualType SrcType, QualType DestType,
  3443. SourceLocation Loc) {
  3444. assert(CGF.hasScalarEvaluationKind(DestType) &&
  3445. "DestType must have scalar evaluation kind.");
  3446. assert(!Val.isAggregate() && "Must be a scalar or complex.");
  3447. return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
  3448. DestType, Loc)
  3449. : CGF.EmitComplexToScalarConversion(
  3450. Val.getComplexVal(), SrcType, DestType, Loc);
  3451. }
  3452. static CodeGenFunction::ComplexPairTy
  3453. convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
  3454. QualType DestType, SourceLocation Loc) {
  3455. assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
  3456. "DestType must have complex evaluation kind.");
  3457. CodeGenFunction::ComplexPairTy ComplexVal;
  3458. if (Val.isScalar()) {
  3459. // Convert the input element to the element type of the complex.
  3460. QualType DestElementType =
  3461. DestType->castAs<ComplexType>()->getElementType();
  3462. llvm::Value *ScalarVal = CGF.EmitScalarConversion(
  3463. Val.getScalarVal(), SrcType, DestElementType, Loc);
  3464. ComplexVal = CodeGenFunction::ComplexPairTy(
  3465. ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
  3466. } else {
  3467. assert(Val.isComplex() && "Must be a scalar or complex.");
  3468. QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
  3469. QualType DestElementType =
  3470. DestType->castAs<ComplexType>()->getElementType();
  3471. ComplexVal.first = CGF.EmitScalarConversion(
  3472. Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
  3473. ComplexVal.second = CGF.EmitScalarConversion(
  3474. Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
  3475. }
  3476. return ComplexVal;
  3477. }
  3478. static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
  3479. LValue LVal, RValue RVal) {
  3480. if (LVal.isGlobalReg()) {
  3481. CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
  3482. } else {
  3483. CGF.EmitAtomicStore(RVal, LVal,
  3484. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3485. : llvm::AtomicOrdering::Monotonic,
  3486. LVal.isVolatile(), /*isInit=*/false);
  3487. }
  3488. }
  3489. void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
  3490. QualType RValTy, SourceLocation Loc) {
  3491. switch (getEvaluationKind(LVal.getType())) {
  3492. case TEK_Scalar:
  3493. EmitStoreThroughLValue(RValue::get(convertToScalarValue(
  3494. *this, RVal, RValTy, LVal.getType(), Loc)),
  3495. LVal);
  3496. break;
  3497. case TEK_Complex:
  3498. EmitStoreOfComplex(
  3499. convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
  3500. /*isInit=*/false);
  3501. break;
  3502. case TEK_Aggregate:
  3503. llvm_unreachable("Must be a scalar or complex.");
  3504. }
  3505. }
  3506. static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3507. const Expr *X, const Expr *V,
  3508. SourceLocation Loc) {
  3509. // v = x;
  3510. assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
  3511. assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
  3512. LValue XLValue = CGF.EmitLValue(X);
  3513. LValue VLValue = CGF.EmitLValue(V);
  3514. RValue Res = XLValue.isGlobalReg()
  3515. ? CGF.EmitLoadOfLValue(XLValue, Loc)
  3516. : CGF.EmitAtomicLoad(
  3517. XLValue, Loc,
  3518. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3519. : llvm::AtomicOrdering::Monotonic,
  3520. XLValue.isVolatile());
  3521. // OpenMP, 2.12.6, atomic Construct
  3522. // Any atomic construct with a seq_cst clause forces the atomically
  3523. // performed operation to include an implicit flush operation without a
  3524. // list.
  3525. if (IsSeqCst)
  3526. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3527. CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
  3528. }
  3529. static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3530. const Expr *X, const Expr *E,
  3531. SourceLocation Loc) {
  3532. // x = expr;
  3533. assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
  3534. emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
  3535. // OpenMP, 2.12.6, atomic Construct
  3536. // Any atomic construct with a seq_cst clause forces the atomically
  3537. // performed operation to include an implicit flush operation without a
  3538. // list.
  3539. if (IsSeqCst)
  3540. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3541. }
  3542. static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
  3543. RValue Update,
  3544. BinaryOperatorKind BO,
  3545. llvm::AtomicOrdering AO,
  3546. bool IsXLHSInRHSPart) {
  3547. ASTContext &Context = CGF.getContext();
  3548. // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
  3549. // expression is simple and atomic is allowed for the given type for the
  3550. // target platform.
  3551. if (BO == BO_Comma || !Update.isScalar() ||
  3552. !Update.getScalarVal()->getType()->isIntegerTy() ||
  3553. !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
  3554. (Update.getScalarVal()->getType() !=
  3555. X.getAddress().getElementType())) ||
  3556. !X.getAddress().getElementType()->isIntegerTy() ||
  3557. !Context.getTargetInfo().hasBuiltinAtomic(
  3558. Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
  3559. return std::make_pair(false, RValue::get(nullptr));
  3560. llvm::AtomicRMWInst::BinOp RMWOp;
  3561. switch (BO) {
  3562. case BO_Add:
  3563. RMWOp = llvm::AtomicRMWInst::Add;
  3564. break;
  3565. case BO_Sub:
  3566. if (!IsXLHSInRHSPart)
  3567. return std::make_pair(false, RValue::get(nullptr));
  3568. RMWOp = llvm::AtomicRMWInst::Sub;
  3569. break;
  3570. case BO_And:
  3571. RMWOp = llvm::AtomicRMWInst::And;
  3572. break;
  3573. case BO_Or:
  3574. RMWOp = llvm::AtomicRMWInst::Or;
  3575. break;
  3576. case BO_Xor:
  3577. RMWOp = llvm::AtomicRMWInst::Xor;
  3578. break;
  3579. case BO_LT:
  3580. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3581. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
  3582. : llvm::AtomicRMWInst::Max)
  3583. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
  3584. : llvm::AtomicRMWInst::UMax);
  3585. break;
  3586. case BO_GT:
  3587. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3588. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
  3589. : llvm::AtomicRMWInst::Min)
  3590. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
  3591. : llvm::AtomicRMWInst::UMin);
  3592. break;
  3593. case BO_Assign:
  3594. RMWOp = llvm::AtomicRMWInst::Xchg;
  3595. break;
  3596. case BO_Mul:
  3597. case BO_Div:
  3598. case BO_Rem:
  3599. case BO_Shl:
  3600. case BO_Shr:
  3601. case BO_LAnd:
  3602. case BO_LOr:
  3603. return std::make_pair(false, RValue::get(nullptr));
  3604. case BO_PtrMemD:
  3605. case BO_PtrMemI:
  3606. case BO_LE:
  3607. case BO_GE:
  3608. case BO_EQ:
  3609. case BO_NE:
  3610. case BO_Cmp:
  3611. case BO_AddAssign:
  3612. case BO_SubAssign:
  3613. case BO_AndAssign:
  3614. case BO_OrAssign:
  3615. case BO_XorAssign:
  3616. case BO_MulAssign:
  3617. case BO_DivAssign:
  3618. case BO_RemAssign:
  3619. case BO_ShlAssign:
  3620. case BO_ShrAssign:
  3621. case BO_Comma:
  3622. llvm_unreachable("Unsupported atomic update operation");
  3623. }
  3624. llvm::Value *UpdateVal = Update.getScalarVal();
  3625. if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
  3626. UpdateVal = CGF.Builder.CreateIntCast(
  3627. IC, X.getAddress().getElementType(),
  3628. X.getType()->hasSignedIntegerRepresentation());
  3629. }
  3630. llvm::Value *Res =
  3631. CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
  3632. return std::make_pair(true, RValue::get(Res));
  3633. }
  3634. std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
  3635. LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
  3636. llvm::AtomicOrdering AO, SourceLocation Loc,
  3637. const llvm::function_ref<RValue(RValue)> CommonGen) {
  3638. // Update expressions are allowed to have the following forms:
  3639. // x binop= expr; -> xrval + expr;
  3640. // x++, ++x -> xrval + 1;
  3641. // x--, --x -> xrval - 1;
  3642. // x = x binop expr; -> xrval binop expr
  3643. // x = expr Op x; - > expr binop xrval;
  3644. auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
  3645. if (!Res.first) {
  3646. if (X.isGlobalReg()) {
  3647. // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
  3648. // 'xrval'.
  3649. EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
  3650. } else {
  3651. // Perform compare-and-swap procedure.
  3652. EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
  3653. }
  3654. }
  3655. return Res;
  3656. }
  3657. static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3658. const Expr *X, const Expr *E,
  3659. const Expr *UE, bool IsXLHSInRHSPart,
  3660. SourceLocation Loc) {
  3661. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3662. "Update expr in 'atomic update' must be a binary operator.");
  3663. const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3664. // Update expressions are allowed to have the following forms:
  3665. // x binop= expr; -> xrval + expr;
  3666. // x++, ++x -> xrval + 1;
  3667. // x--, --x -> xrval - 1;
  3668. // x = x binop expr; -> xrval binop expr
  3669. // x = expr Op x; - > expr binop xrval;
  3670. assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
  3671. LValue XLValue = CGF.EmitLValue(X);
  3672. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3673. llvm::AtomicOrdering AO = IsSeqCst
  3674. ? llvm::AtomicOrdering::SequentiallyConsistent
  3675. : llvm::AtomicOrdering::Monotonic;
  3676. const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3677. const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3678. const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3679. const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3680. auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
  3681. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3682. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3683. return CGF.EmitAnyExpr(UE);
  3684. };
  3685. (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
  3686. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3687. // OpenMP, 2.12.6, atomic Construct
  3688. // Any atomic construct with a seq_cst clause forces the atomically
  3689. // performed operation to include an implicit flush operation without a
  3690. // list.
  3691. if (IsSeqCst)
  3692. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3693. }
  3694. static RValue convertToType(CodeGenFunction &CGF, RValue Value,
  3695. QualType SourceType, QualType ResType,
  3696. SourceLocation Loc) {
  3697. switch (CGF.getEvaluationKind(ResType)) {
  3698. case TEK_Scalar:
  3699. return RValue::get(
  3700. convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
  3701. case TEK_Complex: {
  3702. auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
  3703. return RValue::getComplex(Res.first, Res.second);
  3704. }
  3705. case TEK_Aggregate:
  3706. break;
  3707. }
  3708. llvm_unreachable("Must be a scalar or complex.");
  3709. }
  3710. static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3711. bool IsPostfixUpdate, const Expr *V,
  3712. const Expr *X, const Expr *E,
  3713. const Expr *UE, bool IsXLHSInRHSPart,
  3714. SourceLocation Loc) {
  3715. assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
  3716. assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
  3717. RValue NewVVal;
  3718. LValue VLValue = CGF.EmitLValue(V);
  3719. LValue XLValue = CGF.EmitLValue(X);
  3720. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3721. llvm::AtomicOrdering AO = IsSeqCst
  3722. ? llvm::AtomicOrdering::SequentiallyConsistent
  3723. : llvm::AtomicOrdering::Monotonic;
  3724. QualType NewVValType;
  3725. if (UE) {
  3726. // 'x' is updated with some additional value.
  3727. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3728. "Update expr in 'atomic capture' must be a binary operator.");
  3729. const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3730. // Update expressions are allowed to have the following forms:
  3731. // x binop= expr; -> xrval + expr;
  3732. // x++, ++x -> xrval + 1;
  3733. // x--, --x -> xrval - 1;
  3734. // x = x binop expr; -> xrval binop expr
  3735. // x = expr Op x; - > expr binop xrval;
  3736. const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3737. const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3738. const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3739. NewVValType = XRValExpr->getType();
  3740. const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3741. auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
  3742. IsPostfixUpdate](RValue XRValue) {
  3743. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3744. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3745. RValue Res = CGF.EmitAnyExpr(UE);
  3746. NewVVal = IsPostfixUpdate ? XRValue : Res;
  3747. return Res;
  3748. };
  3749. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3750. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3751. if (Res.first) {
  3752. // 'atomicrmw' instruction was generated.
  3753. if (IsPostfixUpdate) {
  3754. // Use old value from 'atomicrmw'.
  3755. NewVVal = Res.second;
  3756. } else {
  3757. // 'atomicrmw' does not provide new value, so evaluate it using old
  3758. // value of 'x'.
  3759. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3760. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
  3761. NewVVal = CGF.EmitAnyExpr(UE);
  3762. }
  3763. }
  3764. } else {
  3765. // 'x' is simply rewritten with some 'expr'.
  3766. NewVValType = X->getType().getNonReferenceType();
  3767. ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
  3768. X->getType().getNonReferenceType(), Loc);
  3769. auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
  3770. NewVVal = XRValue;
  3771. return ExprRValue;
  3772. };
  3773. // Try to perform atomicrmw xchg, otherwise simple exchange.
  3774. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3775. XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
  3776. Loc, Gen);
  3777. if (Res.first) {
  3778. // 'atomicrmw' instruction was generated.
  3779. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
  3780. }
  3781. }
  3782. // Emit post-update store to 'v' of old/new 'x' value.
  3783. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
  3784. // OpenMP, 2.12.6, atomic Construct
  3785. // Any atomic construct with a seq_cst clause forces the atomically
  3786. // performed operation to include an implicit flush operation without a
  3787. // list.
  3788. if (IsSeqCst)
  3789. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3790. }
  3791. static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
  3792. bool IsSeqCst, bool IsPostfixUpdate,
  3793. const Expr *X, const Expr *V, const Expr *E,
  3794. const Expr *UE, bool IsXLHSInRHSPart,
  3795. SourceLocation Loc) {
  3796. switch (Kind) {
  3797. case OMPC_read:
  3798. emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
  3799. break;
  3800. case OMPC_write:
  3801. emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
  3802. break;
  3803. case OMPC_unknown:
  3804. case OMPC_update:
  3805. emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
  3806. break;
  3807. case OMPC_capture:
  3808. emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
  3809. IsXLHSInRHSPart, Loc);
  3810. break;
  3811. case OMPC_if:
  3812. case OMPC_final:
  3813. case OMPC_num_threads:
  3814. case OMPC_private:
  3815. case OMPC_firstprivate:
  3816. case OMPC_lastprivate:
  3817. case OMPC_reduction:
  3818. case OMPC_task_reduction:
  3819. case OMPC_in_reduction:
  3820. case OMPC_safelen:
  3821. case OMPC_simdlen:
  3822. case OMPC_allocator:
  3823. case OMPC_allocate:
  3824. case OMPC_collapse:
  3825. case OMPC_default:
  3826. case OMPC_seq_cst:
  3827. case OMPC_shared:
  3828. case OMPC_linear:
  3829. case OMPC_aligned:
  3830. case OMPC_copyin:
  3831. case OMPC_copyprivate:
  3832. case OMPC_flush:
  3833. case OMPC_proc_bind:
  3834. case OMPC_schedule:
  3835. case OMPC_ordered:
  3836. case OMPC_nowait:
  3837. case OMPC_untied:
  3838. case OMPC_threadprivate:
  3839. case OMPC_depend:
  3840. case OMPC_mergeable:
  3841. case OMPC_device:
  3842. case OMPC_threads:
  3843. case OMPC_simd:
  3844. case OMPC_map:
  3845. case OMPC_num_teams:
  3846. case OMPC_thread_limit:
  3847. case OMPC_priority:
  3848. case OMPC_grainsize:
  3849. case OMPC_nogroup:
  3850. case OMPC_num_tasks:
  3851. case OMPC_hint:
  3852. case OMPC_dist_schedule:
  3853. case OMPC_defaultmap:
  3854. case OMPC_uniform:
  3855. case OMPC_to:
  3856. case OMPC_from:
  3857. case OMPC_use_device_ptr:
  3858. case OMPC_is_device_ptr:
  3859. case OMPC_unified_address:
  3860. case OMPC_unified_shared_memory:
  3861. case OMPC_reverse_offload:
  3862. case OMPC_dynamic_allocators:
  3863. case OMPC_atomic_default_mem_order:
  3864. case OMPC_device_type:
  3865. case OMPC_match:
  3866. llvm_unreachable("Clause is not allowed in 'omp atomic'.");
  3867. }
  3868. }
  3869. void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
  3870. bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
  3871. OpenMPClauseKind Kind = OMPC_unknown;
  3872. for (const OMPClause *C : S.clauses()) {
  3873. // Find first clause (skip seq_cst clause, if it is first).
  3874. if (C->getClauseKind() != OMPC_seq_cst) {
  3875. Kind = C->getClauseKind();
  3876. break;
  3877. }
  3878. }
  3879. const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
  3880. if (const auto *FE = dyn_cast<FullExpr>(CS))
  3881. enterFullExpression(FE);
  3882. // Processing for statements under 'atomic capture'.
  3883. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
  3884. for (const Stmt *C : Compound->body()) {
  3885. if (const auto *FE = dyn_cast<FullExpr>(C))
  3886. enterFullExpression(FE);
  3887. }
  3888. }
  3889. auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
  3890. PrePostActionTy &) {
  3891. CGF.EmitStopPoint(CS);
  3892. emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
  3893. S.getV(), S.getExpr(), S.getUpdateExpr(),
  3894. S.isXLHSInRHSPart(), S.getBeginLoc());
  3895. };
  3896. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3897. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
  3898. }
  3899. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  3900. const OMPExecutableDirective &S,
  3901. const RegionCodeGenTy &CodeGen) {
  3902. assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
  3903. CodeGenModule &CGM = CGF.CGM;
  3904. // On device emit this construct as inlined code.
  3905. if (CGM.getLangOpts().OpenMPIsDevice) {
  3906. OMPLexicalScope Scope(CGF, S, OMPD_target);
  3907. CGM.getOpenMPRuntime().emitInlinedDirective(
  3908. CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3909. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  3910. });
  3911. return;
  3912. }
  3913. llvm::Function *Fn = nullptr;
  3914. llvm::Constant *FnID = nullptr;
  3915. const Expr *IfCond = nullptr;
  3916. // Check for the at most one if clause associated with the target region.
  3917. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3918. if (C->getNameModifier() == OMPD_unknown ||
  3919. C->getNameModifier() == OMPD_target) {
  3920. IfCond = C->getCondition();
  3921. break;
  3922. }
  3923. }
  3924. // Check if we have any device clause associated with the directive.
  3925. const Expr *Device = nullptr;
  3926. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  3927. Device = C->getDevice();
  3928. // Check if we have an if clause whose conditional always evaluates to false
  3929. // or if we do not have any targets specified. If so the target region is not
  3930. // an offload entry point.
  3931. bool IsOffloadEntry = true;
  3932. if (IfCond) {
  3933. bool Val;
  3934. if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
  3935. IsOffloadEntry = false;
  3936. }
  3937. if (CGM.getLangOpts().OMPTargetTriples.empty())
  3938. IsOffloadEntry = false;
  3939. assert(CGF.CurFuncDecl && "No parent declaration for target region!");
  3940. StringRef ParentName;
  3941. // In case we have Ctors/Dtors we use the complete type variant to produce
  3942. // the mangling of the device outlined kernel.
  3943. if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
  3944. ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
  3945. else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
  3946. ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
  3947. else
  3948. ParentName =
  3949. CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
  3950. // Emit target region as a standalone region.
  3951. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
  3952. IsOffloadEntry, CodeGen);
  3953. OMPLexicalScope Scope(CGF, S, OMPD_task);
  3954. auto &&SizeEmitter =
  3955. [IsOffloadEntry](CodeGenFunction &CGF,
  3956. const OMPLoopDirective &D) -> llvm::Value * {
  3957. if (IsOffloadEntry) {
  3958. OMPLoopScope(CGF, D);
  3959. // Emit calculation of the iterations count.
  3960. llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
  3961. NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
  3962. /*isSigned=*/false);
  3963. return NumIterations;
  3964. }
  3965. return nullptr;
  3966. };
  3967. CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
  3968. SizeEmitter);
  3969. }
  3970. static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
  3971. PrePostActionTy &Action) {
  3972. Action.Enter(CGF);
  3973. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3974. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3975. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3976. (void)PrivateScope.Privatize();
  3977. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  3978. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  3979. CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
  3980. }
  3981. void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
  3982. StringRef ParentName,
  3983. const OMPTargetDirective &S) {
  3984. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3985. emitTargetRegion(CGF, S, Action);
  3986. };
  3987. llvm::Function *Fn;
  3988. llvm::Constant *Addr;
  3989. // Emit target region as a standalone region.
  3990. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3991. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3992. assert(Fn && Addr && "Target device function emission failed.");
  3993. }
  3994. void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
  3995. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3996. emitTargetRegion(CGF, S, Action);
  3997. };
  3998. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3999. }
  4000. static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
  4001. const OMPExecutableDirective &S,
  4002. OpenMPDirectiveKind InnermostKind,
  4003. const RegionCodeGenTy &CodeGen) {
  4004. const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
  4005. llvm::Function *OutlinedFn =
  4006. CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
  4007. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  4008. const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
  4009. const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
  4010. if (NT || TL) {
  4011. const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
  4012. const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
  4013. CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
  4014. S.getBeginLoc());
  4015. }
  4016. OMPTeamsScope Scope(CGF, S);
  4017. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  4018. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  4019. CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
  4020. CapturedVars);
  4021. }
  4022. void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
  4023. // Emit teams region as a standalone region.
  4024. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4025. Action.Enter(CGF);
  4026. OMPPrivateScope PrivateScope(CGF);
  4027. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  4028. CGF.EmitOMPPrivateClause(S, PrivateScope);
  4029. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4030. (void)PrivateScope.Privatize();
  4031. CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
  4032. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4033. };
  4034. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  4035. emitPostUpdateForReductionClause(*this, S,
  4036. [](CodeGenFunction &) { return nullptr; });
  4037. }
  4038. static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  4039. const OMPTargetTeamsDirective &S) {
  4040. auto *CS = S.getCapturedStmt(OMPD_teams);
  4041. Action.Enter(CGF);
  4042. // Emit teams region as a standalone region.
  4043. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4044. Action.Enter(CGF);
  4045. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4046. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  4047. CGF.EmitOMPPrivateClause(S, PrivateScope);
  4048. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4049. (void)PrivateScope.Privatize();
  4050. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  4051. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  4052. CGF.EmitStmt(CS->getCapturedStmt());
  4053. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4054. };
  4055. emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
  4056. emitPostUpdateForReductionClause(CGF, S,
  4057. [](CodeGenFunction &) { return nullptr; });
  4058. }
  4059. void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
  4060. CodeGenModule &CGM, StringRef ParentName,
  4061. const OMPTargetTeamsDirective &S) {
  4062. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4063. emitTargetTeamsRegion(CGF, Action, S);
  4064. };
  4065. llvm::Function *Fn;
  4066. llvm::Constant *Addr;
  4067. // Emit target region as a standalone region.
  4068. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4069. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4070. assert(Fn && Addr && "Target device function emission failed.");
  4071. }
  4072. void CodeGenFunction::EmitOMPTargetTeamsDirective(
  4073. const OMPTargetTeamsDirective &S) {
  4074. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4075. emitTargetTeamsRegion(CGF, Action, S);
  4076. };
  4077. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4078. }
  4079. static void
  4080. emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  4081. const OMPTargetTeamsDistributeDirective &S) {
  4082. Action.Enter(CGF);
  4083. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4084. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4085. };
  4086. // Emit teams region as a standalone region.
  4087. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4088. PrePostActionTy &Action) {
  4089. Action.Enter(CGF);
  4090. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4091. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4092. (void)PrivateScope.Privatize();
  4093. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4094. CodeGenDistribute);
  4095. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4096. };
  4097. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
  4098. emitPostUpdateForReductionClause(CGF, S,
  4099. [](CodeGenFunction &) { return nullptr; });
  4100. }
  4101. void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
  4102. CodeGenModule &CGM, StringRef ParentName,
  4103. const OMPTargetTeamsDistributeDirective &S) {
  4104. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4105. emitTargetTeamsDistributeRegion(CGF, Action, S);
  4106. };
  4107. llvm::Function *Fn;
  4108. llvm::Constant *Addr;
  4109. // Emit target region as a standalone region.
  4110. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4111. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4112. assert(Fn && Addr && "Target device function emission failed.");
  4113. }
  4114. void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
  4115. const OMPTargetTeamsDistributeDirective &S) {
  4116. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4117. emitTargetTeamsDistributeRegion(CGF, Action, S);
  4118. };
  4119. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4120. }
  4121. static void emitTargetTeamsDistributeSimdRegion(
  4122. CodeGenFunction &CGF, PrePostActionTy &Action,
  4123. const OMPTargetTeamsDistributeSimdDirective &S) {
  4124. Action.Enter(CGF);
  4125. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4126. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4127. };
  4128. // Emit teams region as a standalone region.
  4129. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4130. PrePostActionTy &Action) {
  4131. Action.Enter(CGF);
  4132. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4133. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4134. (void)PrivateScope.Privatize();
  4135. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4136. CodeGenDistribute);
  4137. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4138. };
  4139. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
  4140. emitPostUpdateForReductionClause(CGF, S,
  4141. [](CodeGenFunction &) { return nullptr; });
  4142. }
  4143. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
  4144. CodeGenModule &CGM, StringRef ParentName,
  4145. const OMPTargetTeamsDistributeSimdDirective &S) {
  4146. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4147. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  4148. };
  4149. llvm::Function *Fn;
  4150. llvm::Constant *Addr;
  4151. // Emit target region as a standalone region.
  4152. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4153. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4154. assert(Fn && Addr && "Target device function emission failed.");
  4155. }
  4156. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
  4157. const OMPTargetTeamsDistributeSimdDirective &S) {
  4158. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4159. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  4160. };
  4161. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4162. }
  4163. void CodeGenFunction::EmitOMPTeamsDistributeDirective(
  4164. const OMPTeamsDistributeDirective &S) {
  4165. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4166. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4167. };
  4168. // Emit teams region as a standalone region.
  4169. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4170. PrePostActionTy &Action) {
  4171. Action.Enter(CGF);
  4172. OMPPrivateScope PrivateScope(CGF);
  4173. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4174. (void)PrivateScope.Privatize();
  4175. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4176. CodeGenDistribute);
  4177. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4178. };
  4179. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  4180. emitPostUpdateForReductionClause(*this, S,
  4181. [](CodeGenFunction &) { return nullptr; });
  4182. }
  4183. void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
  4184. const OMPTeamsDistributeSimdDirective &S) {
  4185. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4186. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4187. };
  4188. // Emit teams region as a standalone region.
  4189. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4190. PrePostActionTy &Action) {
  4191. Action.Enter(CGF);
  4192. OMPPrivateScope PrivateScope(CGF);
  4193. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4194. (void)PrivateScope.Privatize();
  4195. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
  4196. CodeGenDistribute);
  4197. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4198. };
  4199. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
  4200. emitPostUpdateForReductionClause(*this, S,
  4201. [](CodeGenFunction &) { return nullptr; });
  4202. }
  4203. void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
  4204. const OMPTeamsDistributeParallelForDirective &S) {
  4205. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4206. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4207. S.getDistInc());
  4208. };
  4209. // Emit teams region as a standalone region.
  4210. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4211. PrePostActionTy &Action) {
  4212. Action.Enter(CGF);
  4213. OMPPrivateScope PrivateScope(CGF);
  4214. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4215. (void)PrivateScope.Privatize();
  4216. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4217. CodeGenDistribute);
  4218. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4219. };
  4220. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  4221. emitPostUpdateForReductionClause(*this, S,
  4222. [](CodeGenFunction &) { return nullptr; });
  4223. }
  4224. void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
  4225. const OMPTeamsDistributeParallelForSimdDirective &S) {
  4226. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4227. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4228. S.getDistInc());
  4229. };
  4230. // Emit teams region as a standalone region.
  4231. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4232. PrePostActionTy &Action) {
  4233. Action.Enter(CGF);
  4234. OMPPrivateScope PrivateScope(CGF);
  4235. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4236. (void)PrivateScope.Privatize();
  4237. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4238. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4239. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4240. };
  4241. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  4242. emitPostUpdateForReductionClause(*this, S,
  4243. [](CodeGenFunction &) { return nullptr; });
  4244. }
  4245. static void emitTargetTeamsDistributeParallelForRegion(
  4246. CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
  4247. PrePostActionTy &Action) {
  4248. Action.Enter(CGF);
  4249. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4250. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4251. S.getDistInc());
  4252. };
  4253. // Emit teams region as a standalone region.
  4254. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4255. PrePostActionTy &Action) {
  4256. Action.Enter(CGF);
  4257. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4258. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4259. (void)PrivateScope.Privatize();
  4260. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4261. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4262. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4263. };
  4264. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
  4265. CodeGenTeams);
  4266. emitPostUpdateForReductionClause(CGF, S,
  4267. [](CodeGenFunction &) { return nullptr; });
  4268. }
  4269. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
  4270. CodeGenModule &CGM, StringRef ParentName,
  4271. const OMPTargetTeamsDistributeParallelForDirective &S) {
  4272. // Emit SPMD target teams distribute parallel for region as a standalone
  4273. // region.
  4274. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4275. emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
  4276. };
  4277. llvm::Function *Fn;
  4278. llvm::Constant *Addr;
  4279. // Emit target region as a standalone region.
  4280. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4281. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4282. assert(Fn && Addr && "Target device function emission failed.");
  4283. }
  4284. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
  4285. const OMPTargetTeamsDistributeParallelForDirective &S) {
  4286. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4287. emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
  4288. };
  4289. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4290. }
  4291. static void emitTargetTeamsDistributeParallelForSimdRegion(
  4292. CodeGenFunction &CGF,
  4293. const OMPTargetTeamsDistributeParallelForSimdDirective &S,
  4294. PrePostActionTy &Action) {
  4295. Action.Enter(CGF);
  4296. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4297. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4298. S.getDistInc());
  4299. };
  4300. // Emit teams region as a standalone region.
  4301. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4302. PrePostActionTy &Action) {
  4303. Action.Enter(CGF);
  4304. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4305. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4306. (void)PrivateScope.Privatize();
  4307. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4308. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4309. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4310. };
  4311. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
  4312. CodeGenTeams);
  4313. emitPostUpdateForReductionClause(CGF, S,
  4314. [](CodeGenFunction &) { return nullptr; });
  4315. }
  4316. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
  4317. CodeGenModule &CGM, StringRef ParentName,
  4318. const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
  4319. // Emit SPMD target teams distribute parallel for simd region as a standalone
  4320. // region.
  4321. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4322. emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
  4323. };
  4324. llvm::Function *Fn;
  4325. llvm::Constant *Addr;
  4326. // Emit target region as a standalone region.
  4327. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4328. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4329. assert(Fn && Addr && "Target device function emission failed.");
  4330. }
  4331. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
  4332. const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
  4333. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4334. emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
  4335. };
  4336. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4337. }
  4338. void CodeGenFunction::EmitOMPCancellationPointDirective(
  4339. const OMPCancellationPointDirective &S) {
  4340. CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
  4341. S.getCancelRegion());
  4342. }
  4343. void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
  4344. const Expr *IfCond = nullptr;
  4345. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  4346. if (C->getNameModifier() == OMPD_unknown ||
  4347. C->getNameModifier() == OMPD_cancel) {
  4348. IfCond = C->getCondition();
  4349. break;
  4350. }
  4351. }
  4352. CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
  4353. S.getCancelRegion());
  4354. }
  4355. CodeGenFunction::JumpDest
  4356. CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
  4357. if (Kind == OMPD_parallel || Kind == OMPD_task ||
  4358. Kind == OMPD_target_parallel)
  4359. return ReturnBlock;
  4360. assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
  4361. Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
  4362. Kind == OMPD_distribute_parallel_for ||
  4363. Kind == OMPD_target_parallel_for ||
  4364. Kind == OMPD_teams_distribute_parallel_for ||
  4365. Kind == OMPD_target_teams_distribute_parallel_for);
  4366. return OMPCancelStack.getExitBlock();
  4367. }
  4368. void CodeGenFunction::EmitOMPUseDevicePtrClause(
  4369. const OMPClause &NC, OMPPrivateScope &PrivateScope,
  4370. const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
  4371. const auto &C = cast<OMPUseDevicePtrClause>(NC);
  4372. auto OrigVarIt = C.varlist_begin();
  4373. auto InitIt = C.inits().begin();
  4374. for (const Expr *PvtVarIt : C.private_copies()) {
  4375. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
  4376. const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
  4377. const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
  4378. // In order to identify the right initializer we need to match the
  4379. // declaration used by the mapping logic. In some cases we may get
  4380. // OMPCapturedExprDecl that refers to the original declaration.
  4381. const ValueDecl *MatchingVD = OrigVD;
  4382. if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
  4383. // OMPCapturedExprDecl are used to privative fields of the current
  4384. // structure.
  4385. const auto *ME = cast<MemberExpr>(OED->getInit());
  4386. assert(isa<CXXThisExpr>(ME->getBase()) &&
  4387. "Base should be the current struct!");
  4388. MatchingVD = ME->getMemberDecl();
  4389. }
  4390. // If we don't have information about the current list item, move on to
  4391. // the next one.
  4392. auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
  4393. if (InitAddrIt == CaptureDeviceAddrMap.end())
  4394. continue;
  4395. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
  4396. InitAddrIt, InitVD,
  4397. PvtVD]() {
  4398. // Initialize the temporary initialization variable with the address we
  4399. // get from the runtime library. We have to cast the source address
  4400. // because it is always a void *. References are materialized in the
  4401. // privatization scope, so the initialization here disregards the fact
  4402. // the original variable is a reference.
  4403. QualType AddrQTy =
  4404. getContext().getPointerType(OrigVD->getType().getNonReferenceType());
  4405. llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
  4406. Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
  4407. setAddrOfLocalVar(InitVD, InitAddr);
  4408. // Emit private declaration, it will be initialized by the value we
  4409. // declaration we just added to the local declarations map.
  4410. EmitDecl(*PvtVD);
  4411. // The initialization variables reached its purpose in the emission
  4412. // of the previous declaration, so we don't need it anymore.
  4413. LocalDeclMap.erase(InitVD);
  4414. // Return the address of the private variable.
  4415. return GetAddrOfLocalVar(PvtVD);
  4416. });
  4417. assert(IsRegistered && "firstprivate var already registered as private");
  4418. // Silence the warning about unused variable.
  4419. (void)IsRegistered;
  4420. ++OrigVarIt;
  4421. ++InitIt;
  4422. }
  4423. }
  4424. // Generate the instructions for '#pragma omp target data' directive.
  4425. void CodeGenFunction::EmitOMPTargetDataDirective(
  4426. const OMPTargetDataDirective &S) {
  4427. CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
  4428. // Create a pre/post action to signal the privatization of the device pointer.
  4429. // This action can be replaced by the OpenMP runtime code generation to
  4430. // deactivate privatization.
  4431. bool PrivatizeDevicePointers = false;
  4432. class DevicePointerPrivActionTy : public PrePostActionTy {
  4433. bool &PrivatizeDevicePointers;
  4434. public:
  4435. explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
  4436. : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
  4437. void Enter(CodeGenFunction &CGF) override {
  4438. PrivatizeDevicePointers = true;
  4439. }
  4440. };
  4441. DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
  4442. auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
  4443. CodeGenFunction &CGF, PrePostActionTy &Action) {
  4444. auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4445. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  4446. };
  4447. // Codegen that selects whether to generate the privatization code or not.
  4448. auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
  4449. &InnermostCodeGen](CodeGenFunction &CGF,
  4450. PrePostActionTy &Action) {
  4451. RegionCodeGenTy RCG(InnermostCodeGen);
  4452. PrivatizeDevicePointers = false;
  4453. // Call the pre-action to change the status of PrivatizeDevicePointers if
  4454. // needed.
  4455. Action.Enter(CGF);
  4456. if (PrivatizeDevicePointers) {
  4457. OMPPrivateScope PrivateScope(CGF);
  4458. // Emit all instances of the use_device_ptr clause.
  4459. for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
  4460. CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
  4461. Info.CaptureDeviceAddrMap);
  4462. (void)PrivateScope.Privatize();
  4463. RCG(CGF);
  4464. } else {
  4465. RCG(CGF);
  4466. }
  4467. };
  4468. // Forward the provided action to the privatization codegen.
  4469. RegionCodeGenTy PrivRCG(PrivCodeGen);
  4470. PrivRCG.setAction(Action);
  4471. // Notwithstanding the body of the region is emitted as inlined directive,
  4472. // we don't use an inline scope as changes in the references inside the
  4473. // region are expected to be visible outside, so we do not privative them.
  4474. OMPLexicalScope Scope(CGF, S);
  4475. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
  4476. PrivRCG);
  4477. };
  4478. RegionCodeGenTy RCG(CodeGen);
  4479. // If we don't have target devices, don't bother emitting the data mapping
  4480. // code.
  4481. if (CGM.getLangOpts().OMPTargetTriples.empty()) {
  4482. RCG(*this);
  4483. return;
  4484. }
  4485. // Check if we have any if clause associated with the directive.
  4486. const Expr *IfCond = nullptr;
  4487. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4488. IfCond = C->getCondition();
  4489. // Check if we have any device clause associated with the directive.
  4490. const Expr *Device = nullptr;
  4491. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4492. Device = C->getDevice();
  4493. // Set the action to signal privatization of device pointers.
  4494. RCG.setAction(PrivAction);
  4495. // Emit region code.
  4496. CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
  4497. Info);
  4498. }
  4499. void CodeGenFunction::EmitOMPTargetEnterDataDirective(
  4500. const OMPTargetEnterDataDirective &S) {
  4501. // If we don't have target devices, don't bother emitting the data mapping
  4502. // code.
  4503. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4504. return;
  4505. // Check if we have any if clause associated with the directive.
  4506. const Expr *IfCond = nullptr;
  4507. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4508. IfCond = C->getCondition();
  4509. // Check if we have any device clause associated with the directive.
  4510. const Expr *Device = nullptr;
  4511. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4512. Device = C->getDevice();
  4513. OMPLexicalScope Scope(*this, S, OMPD_task);
  4514. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4515. }
  4516. void CodeGenFunction::EmitOMPTargetExitDataDirective(
  4517. const OMPTargetExitDataDirective &S) {
  4518. // If we don't have target devices, don't bother emitting the data mapping
  4519. // code.
  4520. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4521. return;
  4522. // Check if we have any if clause associated with the directive.
  4523. const Expr *IfCond = nullptr;
  4524. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4525. IfCond = C->getCondition();
  4526. // Check if we have any device clause associated with the directive.
  4527. const Expr *Device = nullptr;
  4528. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4529. Device = C->getDevice();
  4530. OMPLexicalScope Scope(*this, S, OMPD_task);
  4531. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4532. }
  4533. static void emitTargetParallelRegion(CodeGenFunction &CGF,
  4534. const OMPTargetParallelDirective &S,
  4535. PrePostActionTy &Action) {
  4536. // Get the captured statement associated with the 'parallel' region.
  4537. const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
  4538. Action.Enter(CGF);
  4539. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4540. Action.Enter(CGF);
  4541. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4542. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  4543. CGF.EmitOMPPrivateClause(S, PrivateScope);
  4544. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4545. (void)PrivateScope.Privatize();
  4546. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  4547. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  4548. // TODO: Add support for clauses.
  4549. CGF.EmitStmt(CS->getCapturedStmt());
  4550. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  4551. };
  4552. emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
  4553. emitEmptyBoundParameters);
  4554. emitPostUpdateForReductionClause(CGF, S,
  4555. [](CodeGenFunction &) { return nullptr; });
  4556. }
  4557. void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
  4558. CodeGenModule &CGM, StringRef ParentName,
  4559. const OMPTargetParallelDirective &S) {
  4560. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4561. emitTargetParallelRegion(CGF, S, Action);
  4562. };
  4563. llvm::Function *Fn;
  4564. llvm::Constant *Addr;
  4565. // Emit target region as a standalone region.
  4566. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4567. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4568. assert(Fn && Addr && "Target device function emission failed.");
  4569. }
  4570. void CodeGenFunction::EmitOMPTargetParallelDirective(
  4571. const OMPTargetParallelDirective &S) {
  4572. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4573. emitTargetParallelRegion(CGF, S, Action);
  4574. };
  4575. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4576. }
  4577. static void emitTargetParallelForRegion(CodeGenFunction &CGF,
  4578. const OMPTargetParallelForDirective &S,
  4579. PrePostActionTy &Action) {
  4580. Action.Enter(CGF);
  4581. // Emit directive as a combined directive that consists of two implicit
  4582. // directives: 'parallel' with 'for' directive.
  4583. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4584. Action.Enter(CGF);
  4585. CodeGenFunction::OMPCancelStackRAII CancelRegion(
  4586. CGF, OMPD_target_parallel_for, S.hasCancel());
  4587. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4588. emitDispatchForLoopBounds);
  4589. };
  4590. emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
  4591. emitEmptyBoundParameters);
  4592. }
  4593. void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
  4594. CodeGenModule &CGM, StringRef ParentName,
  4595. const OMPTargetParallelForDirective &S) {
  4596. // Emit SPMD target parallel for region as a standalone region.
  4597. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4598. emitTargetParallelForRegion(CGF, S, Action);
  4599. };
  4600. llvm::Function *Fn;
  4601. llvm::Constant *Addr;
  4602. // Emit target region as a standalone region.
  4603. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4604. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4605. assert(Fn && Addr && "Target device function emission failed.");
  4606. }
  4607. void CodeGenFunction::EmitOMPTargetParallelForDirective(
  4608. const OMPTargetParallelForDirective &S) {
  4609. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4610. emitTargetParallelForRegion(CGF, S, Action);
  4611. };
  4612. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4613. }
  4614. static void
  4615. emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
  4616. const OMPTargetParallelForSimdDirective &S,
  4617. PrePostActionTy &Action) {
  4618. Action.Enter(CGF);
  4619. // Emit directive as a combined directive that consists of two implicit
  4620. // directives: 'parallel' with 'for' directive.
  4621. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4622. Action.Enter(CGF);
  4623. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4624. emitDispatchForLoopBounds);
  4625. };
  4626. emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
  4627. emitEmptyBoundParameters);
  4628. }
  4629. void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
  4630. CodeGenModule &CGM, StringRef ParentName,
  4631. const OMPTargetParallelForSimdDirective &S) {
  4632. // Emit SPMD target parallel for region as a standalone region.
  4633. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4634. emitTargetParallelForSimdRegion(CGF, S, Action);
  4635. };
  4636. llvm::Function *Fn;
  4637. llvm::Constant *Addr;
  4638. // Emit target region as a standalone region.
  4639. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4640. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4641. assert(Fn && Addr && "Target device function emission failed.");
  4642. }
  4643. void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
  4644. const OMPTargetParallelForSimdDirective &S) {
  4645. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4646. emitTargetParallelForSimdRegion(CGF, S, Action);
  4647. };
  4648. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4649. }
  4650. /// Emit a helper variable and return corresponding lvalue.
  4651. static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
  4652. const ImplicitParamDecl *PVD,
  4653. CodeGenFunction::OMPPrivateScope &Privates) {
  4654. const auto *VDecl = cast<VarDecl>(Helper->getDecl());
  4655. Privates.addPrivate(VDecl,
  4656. [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
  4657. }
  4658. void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
  4659. assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
  4660. // Emit outlined function for task construct.
  4661. const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
  4662. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  4663. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  4664. const Expr *IfCond = nullptr;
  4665. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  4666. if (C->getNameModifier() == OMPD_unknown ||
  4667. C->getNameModifier() == OMPD_taskloop) {
  4668. IfCond = C->getCondition();
  4669. break;
  4670. }
  4671. }
  4672. OMPTaskDataTy Data;
  4673. // Check if taskloop must be emitted without taskgroup.
  4674. Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
  4675. // TODO: Check if we should emit tied or untied task.
  4676. Data.Tied = true;
  4677. // Set scheduling for taskloop
  4678. if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
  4679. // grainsize clause
  4680. Data.Schedule.setInt(/*IntVal=*/false);
  4681. Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
  4682. } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
  4683. // num_tasks clause
  4684. Data.Schedule.setInt(/*IntVal=*/true);
  4685. Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
  4686. }
  4687. auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
  4688. // if (PreCond) {
  4689. // for (IV in 0..LastIteration) BODY;
  4690. // <Final counter/linear vars updates>;
  4691. // }
  4692. //
  4693. // Emit: if (PreCond) - begin.
  4694. // If the condition constant folds and can be elided, avoid emitting the
  4695. // whole loop.
  4696. bool CondConstant;
  4697. llvm::BasicBlock *ContBlock = nullptr;
  4698. OMPLoopScope PreInitScope(CGF, S);
  4699. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  4700. if (!CondConstant)
  4701. return;
  4702. } else {
  4703. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
  4704. ContBlock = CGF.createBasicBlock("taskloop.if.end");
  4705. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  4706. CGF.getProfileCount(&S));
  4707. CGF.EmitBlock(ThenBlock);
  4708. CGF.incrementProfileCounter(&S);
  4709. }
  4710. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  4711. CGF.EmitOMPSimdInit(S);
  4712. OMPPrivateScope LoopScope(CGF);
  4713. // Emit helper vars inits.
  4714. enum { LowerBound = 5, UpperBound, Stride, LastIter };
  4715. auto *I = CS->getCapturedDecl()->param_begin();
  4716. auto *LBP = std::next(I, LowerBound);
  4717. auto *UBP = std::next(I, UpperBound);
  4718. auto *STP = std::next(I, Stride);
  4719. auto *LIP = std::next(I, LastIter);
  4720. mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
  4721. LoopScope);
  4722. mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
  4723. LoopScope);
  4724. mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
  4725. mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
  4726. LoopScope);
  4727. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  4728. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  4729. (void)LoopScope.Privatize();
  4730. // Emit the loop iteration variable.
  4731. const Expr *IVExpr = S.getIterationVariable();
  4732. const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  4733. CGF.EmitVarDecl(*IVDecl);
  4734. CGF.EmitIgnoredExpr(S.getInit());
  4735. // Emit the iterations count variable.
  4736. // If it is not a variable, Sema decided to calculate iterations count on
  4737. // each iteration (e.g., it is foldable into a constant).
  4738. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  4739. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  4740. // Emit calculation of the iterations count.
  4741. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  4742. }
  4743. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  4744. S.getInc(),
  4745. [&S](CodeGenFunction &CGF) {
  4746. CGF.EmitOMPLoopBody(S, JumpDest());
  4747. CGF.EmitStopPoint(&S);
  4748. },
  4749. [](CodeGenFunction &) {});
  4750. // Emit: if (PreCond) - end.
  4751. if (ContBlock) {
  4752. CGF.EmitBranch(ContBlock);
  4753. CGF.EmitBlock(ContBlock, true);
  4754. }
  4755. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  4756. if (HasLastprivateClause) {
  4757. CGF.EmitOMPLastprivateClauseFinal(
  4758. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  4759. CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
  4760. CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
  4761. (*LIP)->getType(), S.getBeginLoc())));
  4762. }
  4763. };
  4764. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  4765. IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
  4766. const OMPTaskDataTy &Data) {
  4767. auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
  4768. &Data](CodeGenFunction &CGF, PrePostActionTy &) {
  4769. OMPLoopScope PreInitScope(CGF, S);
  4770. CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
  4771. OutlinedFn, SharedsTy,
  4772. CapturedStruct, IfCond, Data);
  4773. };
  4774. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
  4775. CodeGen);
  4776. };
  4777. if (Data.Nogroup) {
  4778. EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
  4779. } else {
  4780. CGM.getOpenMPRuntime().emitTaskgroupRegion(
  4781. *this,
  4782. [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
  4783. PrePostActionTy &Action) {
  4784. Action.Enter(CGF);
  4785. CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
  4786. Data);
  4787. },
  4788. S.getBeginLoc());
  4789. }
  4790. }
  4791. void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
  4792. EmitOMPTaskLoopBasedDirective(S);
  4793. }
  4794. void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
  4795. const OMPTaskLoopSimdDirective &S) {
  4796. EmitOMPTaskLoopBasedDirective(S);
  4797. }
  4798. void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
  4799. const OMPMasterTaskLoopDirective &S) {
  4800. auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4801. Action.Enter(CGF);
  4802. EmitOMPTaskLoopBasedDirective(S);
  4803. };
  4804. OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
  4805. CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
  4806. }
  4807. // Generate the instructions for '#pragma omp target update' directive.
  4808. void CodeGenFunction::EmitOMPTargetUpdateDirective(
  4809. const OMPTargetUpdateDirective &S) {
  4810. // If we don't have target devices, don't bother emitting the data mapping
  4811. // code.
  4812. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4813. return;
  4814. // Check if we have any if clause associated with the directive.
  4815. const Expr *IfCond = nullptr;
  4816. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4817. IfCond = C->getCondition();
  4818. // Check if we have any device clause associated with the directive.
  4819. const Expr *Device = nullptr;
  4820. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4821. Device = C->getDevice();
  4822. OMPLexicalScope Scope(*this, S, OMPD_task);
  4823. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4824. }
  4825. void CodeGenFunction::EmitSimpleOMPExecutableDirective(
  4826. const OMPExecutableDirective &D) {
  4827. if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
  4828. return;
  4829. auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4830. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  4831. emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
  4832. } else {
  4833. OMPPrivateScope LoopGlobals(CGF);
  4834. if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
  4835. for (const Expr *E : LD->counters()) {
  4836. const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4837. if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
  4838. LValue GlobLVal = CGF.EmitLValue(E);
  4839. LoopGlobals.addPrivate(
  4840. VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
  4841. }
  4842. if (isa<OMPCapturedExprDecl>(VD)) {
  4843. // Emit only those that were not explicitly referenced in clauses.
  4844. if (!CGF.LocalDeclMap.count(VD))
  4845. CGF.EmitVarDecl(*VD);
  4846. }
  4847. }
  4848. for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
  4849. if (!C->getNumForLoops())
  4850. continue;
  4851. for (unsigned I = LD->getCollapsedNumber(),
  4852. E = C->getLoopNumIterations().size();
  4853. I < E; ++I) {
  4854. if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
  4855. cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
  4856. // Emit only those that were not explicitly referenced in clauses.
  4857. if (!CGF.LocalDeclMap.count(VD))
  4858. CGF.EmitVarDecl(*VD);
  4859. }
  4860. }
  4861. }
  4862. }
  4863. LoopGlobals.Privatize();
  4864. CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
  4865. }
  4866. };
  4867. OMPSimdLexicalScope Scope(*this, D);
  4868. CGM.getOpenMPRuntime().emitInlinedDirective(
  4869. *this,
  4870. isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
  4871. : D.getDirectiveKind(),
  4872. CodeGen);
  4873. }