CGStmtOpenMP.cpp 212 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067
  1. //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This contains code to emit OpenMP nodes as LLVM code.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "CGCleanup.h"
  14. #include "CGOpenMPRuntime.h"
  15. #include "CodeGenFunction.h"
  16. #include "CodeGenModule.h"
  17. #include "TargetInfo.h"
  18. #include "clang/AST/Stmt.h"
  19. #include "clang/AST/StmtOpenMP.h"
  20. #include "clang/AST/DeclOpenMP.h"
  21. #include "llvm/IR/CallSite.h"
  22. using namespace clang;
  23. using namespace CodeGen;
  24. namespace {
  25. /// Lexical scope for OpenMP executable constructs, that handles correct codegen
  26. /// for captured expressions.
  27. class OMPLexicalScope : public CodeGenFunction::LexicalScope {
  28. void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  29. for (const auto *C : S.clauses()) {
  30. if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
  31. if (const auto *PreInit =
  32. cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
  33. for (const auto *I : PreInit->decls()) {
  34. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  35. CGF.EmitVarDecl(cast<VarDecl>(*I));
  36. } else {
  37. CodeGenFunction::AutoVarEmission Emission =
  38. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  39. CGF.EmitAutoVarCleanups(Emission);
  40. }
  41. }
  42. }
  43. }
  44. }
  45. }
  46. CodeGenFunction::OMPPrivateScope InlinedShareds;
  47. static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
  48. return CGF.LambdaCaptureFields.lookup(VD) ||
  49. (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
  50. (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
  51. }
  52. public:
  53. OMPLexicalScope(
  54. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  55. const llvm::Optional<OpenMPDirectiveKind> CapturedRegion = llvm::None,
  56. const bool EmitPreInitStmt = true)
  57. : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
  58. InlinedShareds(CGF) {
  59. if (EmitPreInitStmt)
  60. emitPreInitStmt(CGF, S);
  61. if (!CapturedRegion.hasValue())
  62. return;
  63. assert(S.hasAssociatedStmt() &&
  64. "Expected associated statement for inlined directive.");
  65. const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
  66. for (const auto &C : CS->captures()) {
  67. if (C.capturesVariable() || C.capturesVariableByCopy()) {
  68. auto *VD = C.getCapturedVar();
  69. assert(VD == VD->getCanonicalDecl() &&
  70. "Canonical decl must be captured.");
  71. DeclRefExpr DRE(
  72. const_cast<VarDecl *>(VD),
  73. isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
  74. InlinedShareds.isGlobalVarCaptured(VD)),
  75. VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
  76. InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
  77. return CGF.EmitLValue(&DRE).getAddress();
  78. });
  79. }
  80. }
  81. (void)InlinedShareds.Privatize();
  82. }
  83. };
  84. /// Lexical scope for OpenMP parallel construct, that handles correct codegen
  85. /// for captured expressions.
  86. class OMPParallelScope final : public OMPLexicalScope {
  87. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  88. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  89. return !(isOpenMPTargetExecutionDirective(Kind) ||
  90. isOpenMPLoopBoundSharingDirective(Kind)) &&
  91. isOpenMPParallelDirective(Kind);
  92. }
  93. public:
  94. OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  95. : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
  96. EmitPreInitStmt(S)) {}
  97. };
  98. /// Lexical scope for OpenMP teams construct, that handles correct codegen
  99. /// for captured expressions.
  100. class OMPTeamsScope final : public OMPLexicalScope {
  101. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  102. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  103. return !isOpenMPTargetExecutionDirective(Kind) &&
  104. isOpenMPTeamsDirective(Kind);
  105. }
  106. public:
  107. OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  108. : OMPLexicalScope(CGF, S, /*CapturedRegion=*/llvm::None,
  109. EmitPreInitStmt(S)) {}
  110. };
  111. /// Private scope for OpenMP loop-based directives, that supports capturing
  112. /// of used expression from loop statement.
  113. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
  114. void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
  115. CodeGenFunction::OMPMapVars PreCondVars;
  116. for (const auto *E : S.counters()) {
  117. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  118. (void)PreCondVars.setVarAddr(
  119. CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
  120. }
  121. (void)PreCondVars.apply(CGF);
  122. if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
  123. for (const auto *I : PreInits->decls())
  124. CGF.EmitVarDecl(cast<VarDecl>(*I));
  125. }
  126. PreCondVars.restore(CGF);
  127. }
  128. public:
  129. OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
  130. : CodeGenFunction::RunCleanupsScope(CGF) {
  131. emitPreInitStmt(CGF, S);
  132. }
  133. };
  134. class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
  135. CodeGenFunction::OMPPrivateScope InlinedShareds;
  136. static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
  137. return CGF.LambdaCaptureFields.lookup(VD) ||
  138. (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
  139. (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
  140. cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
  141. }
  142. public:
  143. OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  144. : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
  145. InlinedShareds(CGF) {
  146. for (const auto *C : S.clauses()) {
  147. if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
  148. if (const auto *PreInit =
  149. cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
  150. for (const auto *I : PreInit->decls()) {
  151. if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
  152. CGF.EmitVarDecl(cast<VarDecl>(*I));
  153. } else {
  154. CodeGenFunction::AutoVarEmission Emission =
  155. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  156. CGF.EmitAutoVarCleanups(Emission);
  157. }
  158. }
  159. }
  160. } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(C)) {
  161. for (const Expr *E : UDP->varlists()) {
  162. const Decl *D = cast<DeclRefExpr>(E)->getDecl();
  163. if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
  164. CGF.EmitVarDecl(*OED);
  165. }
  166. }
  167. }
  168. if (!isOpenMPSimdDirective(S.getDirectiveKind()))
  169. CGF.EmitOMPPrivateClause(S, InlinedShareds);
  170. if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
  171. if (const Expr *E = TG->getReductionRef())
  172. CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
  173. }
  174. const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
  175. while (CS) {
  176. for (auto &C : CS->captures()) {
  177. if (C.capturesVariable() || C.capturesVariableByCopy()) {
  178. auto *VD = C.getCapturedVar();
  179. assert(VD == VD->getCanonicalDecl() &&
  180. "Canonical decl must be captured.");
  181. DeclRefExpr DRE(const_cast<VarDecl *>(VD),
  182. isCapturedVar(CGF, VD) ||
  183. (CGF.CapturedStmtInfo &&
  184. InlinedShareds.isGlobalVarCaptured(VD)),
  185. VD->getType().getNonReferenceType(), VK_LValue,
  186. C.getLocation());
  187. InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
  188. return CGF.EmitLValue(&DRE).getAddress();
  189. });
  190. }
  191. }
  192. CS = dyn_cast<CapturedStmt>(CS->getCapturedStmt());
  193. }
  194. (void)InlinedShareds.Privatize();
  195. }
  196. };
  197. } // namespace
  198. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  199. const OMPExecutableDirective &S,
  200. const RegionCodeGenTy &CodeGen);
  201. LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
  202. if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
  203. if (const auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
  204. OrigVD = OrigVD->getCanonicalDecl();
  205. bool IsCaptured =
  206. LambdaCaptureFields.lookup(OrigVD) ||
  207. (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
  208. (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
  209. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
  210. OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
  211. return EmitLValue(&DRE);
  212. }
  213. }
  214. return EmitLValue(E);
  215. }
  216. llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
  217. ASTContext &C = getContext();
  218. llvm::Value *Size = nullptr;
  219. auto SizeInChars = C.getTypeSizeInChars(Ty);
  220. if (SizeInChars.isZero()) {
  221. // getTypeSizeInChars() returns 0 for a VLA.
  222. while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
  223. VlaSizePair VlaSize = getVLASize(VAT);
  224. Ty = VlaSize.Type;
  225. Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
  226. : VlaSize.NumElts;
  227. }
  228. SizeInChars = C.getTypeSizeInChars(Ty);
  229. if (SizeInChars.isZero())
  230. return llvm::ConstantInt::get(SizeTy, /*V=*/0);
  231. return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
  232. }
  233. return CGM.getSize(SizeInChars);
  234. }
  235. void CodeGenFunction::GenerateOpenMPCapturedVars(
  236. const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
  237. const RecordDecl *RD = S.getCapturedRecordDecl();
  238. auto CurField = RD->field_begin();
  239. auto CurCap = S.captures().begin();
  240. for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
  241. E = S.capture_init_end();
  242. I != E; ++I, ++CurField, ++CurCap) {
  243. if (CurField->hasCapturedVLAType()) {
  244. const VariableArrayType *VAT = CurField->getCapturedVLAType();
  245. llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
  246. CapturedVars.push_back(Val);
  247. } else if (CurCap->capturesThis()) {
  248. CapturedVars.push_back(CXXThisValue);
  249. } else if (CurCap->capturesVariableByCopy()) {
  250. llvm::Value *CV = EmitLoadOfScalar(EmitLValue(*I), CurCap->getLocation());
  251. // If the field is not a pointer, we need to save the actual value
  252. // and load it as a void pointer.
  253. if (!CurField->getType()->isAnyPointerType()) {
  254. ASTContext &Ctx = getContext();
  255. Address DstAddr = CreateMemTemp(
  256. Ctx.getUIntPtrType(),
  257. Twine(CurCap->getCapturedVar()->getName(), ".casted"));
  258. LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
  259. llvm::Value *SrcAddrVal = EmitScalarConversion(
  260. DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
  261. Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
  262. LValue SrcLV =
  263. MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
  264. // Store the value using the source type pointer.
  265. EmitStoreThroughLValue(RValue::get(CV), SrcLV);
  266. // Load the value using the destination type pointer.
  267. CV = EmitLoadOfScalar(DstLV, CurCap->getLocation());
  268. }
  269. CapturedVars.push_back(CV);
  270. } else {
  271. assert(CurCap->capturesVariable() && "Expected capture by reference.");
  272. CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
  273. }
  274. }
  275. }
  276. static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
  277. QualType DstType, StringRef Name,
  278. LValue AddrLV,
  279. bool isReferenceType = false) {
  280. ASTContext &Ctx = CGF.getContext();
  281. llvm::Value *CastedPtr = CGF.EmitScalarConversion(
  282. AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
  283. Ctx.getPointerType(DstType), Loc);
  284. Address TmpAddr =
  285. CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
  286. .getAddress();
  287. // If we are dealing with references we need to return the address of the
  288. // reference instead of the reference of the value.
  289. if (isReferenceType) {
  290. QualType RefType = Ctx.getLValueReferenceType(DstType);
  291. llvm::Value *RefVal = TmpAddr.getPointer();
  292. TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref"));
  293. LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
  294. CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true);
  295. }
  296. return TmpAddr;
  297. }
  298. static QualType getCanonicalParamType(ASTContext &C, QualType T) {
  299. if (T->isLValueReferenceType())
  300. return C.getLValueReferenceType(
  301. getCanonicalParamType(C, T.getNonReferenceType()),
  302. /*SpelledAsLValue=*/false);
  303. if (T->isPointerType())
  304. return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
  305. if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
  306. if (const auto *VLA = dyn_cast<VariableArrayType>(A))
  307. return getCanonicalParamType(C, VLA->getElementType());
  308. if (!A->isVariablyModifiedType())
  309. return C.getCanonicalType(T);
  310. }
  311. return C.getCanonicalParamType(T);
  312. }
  313. namespace {
  314. /// Contains required data for proper outlined function codegen.
  315. struct FunctionOptions {
  316. /// Captured statement for which the function is generated.
  317. const CapturedStmt *S = nullptr;
  318. /// true if cast to/from UIntPtr is required for variables captured by
  319. /// value.
  320. const bool UIntPtrCastRequired = true;
  321. /// true if only casted arguments must be registered as local args or VLA
  322. /// sizes.
  323. const bool RegisterCastedArgsOnly = false;
  324. /// Name of the generated function.
  325. const StringRef FunctionName;
  326. explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
  327. bool RegisterCastedArgsOnly,
  328. StringRef FunctionName)
  329. : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
  330. RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
  331. FunctionName(FunctionName) {}
  332. };
  333. }
  334. static llvm::Function *emitOutlinedFunctionPrologue(
  335. CodeGenFunction &CGF, FunctionArgList &Args,
  336. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
  337. &LocalAddrs,
  338. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
  339. &VLASizes,
  340. llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
  341. const CapturedDecl *CD = FO.S->getCapturedDecl();
  342. const RecordDecl *RD = FO.S->getCapturedRecordDecl();
  343. assert(CD->hasBody() && "missing CapturedDecl body");
  344. CXXThisValue = nullptr;
  345. // Build the argument list.
  346. CodeGenModule &CGM = CGF.CGM;
  347. ASTContext &Ctx = CGM.getContext();
  348. FunctionArgList TargetArgs;
  349. Args.append(CD->param_begin(),
  350. std::next(CD->param_begin(), CD->getContextParamPosition()));
  351. TargetArgs.append(
  352. CD->param_begin(),
  353. std::next(CD->param_begin(), CD->getContextParamPosition()));
  354. auto I = FO.S->captures().begin();
  355. FunctionDecl *DebugFunctionDecl = nullptr;
  356. if (!FO.UIntPtrCastRequired) {
  357. FunctionProtoType::ExtProtoInfo EPI;
  358. DebugFunctionDecl = FunctionDecl::Create(
  359. Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
  360. SourceLocation(), DeclarationName(), Ctx.VoidTy,
  361. Ctx.getTrivialTypeSourceInfo(
  362. Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
  363. SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
  364. }
  365. for (const FieldDecl *FD : RD->fields()) {
  366. QualType ArgType = FD->getType();
  367. IdentifierInfo *II = nullptr;
  368. VarDecl *CapVar = nullptr;
  369. // If this is a capture by copy and the type is not a pointer, the outlined
  370. // function argument type should be uintptr and the value properly casted to
  371. // uintptr. This is necessary given that the runtime library is only able to
  372. // deal with pointers. We can pass in the same way the VLA type sizes to the
  373. // outlined function.
  374. if (FO.UIntPtrCastRequired &&
  375. ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
  376. I->capturesVariableArrayType()))
  377. ArgType = Ctx.getUIntPtrType();
  378. if (I->capturesVariable() || I->capturesVariableByCopy()) {
  379. CapVar = I->getCapturedVar();
  380. II = CapVar->getIdentifier();
  381. } else if (I->capturesThis()) {
  382. II = &Ctx.Idents.get("this");
  383. } else {
  384. assert(I->capturesVariableArrayType());
  385. II = &Ctx.Idents.get("vla");
  386. }
  387. if (ArgType->isVariablyModifiedType())
  388. ArgType = getCanonicalParamType(Ctx, ArgType);
  389. VarDecl *Arg;
  390. if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
  391. Arg = ParmVarDecl::Create(
  392. Ctx, DebugFunctionDecl,
  393. CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
  394. CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
  395. /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
  396. } else {
  397. Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
  398. II, ArgType, ImplicitParamDecl::Other);
  399. }
  400. Args.emplace_back(Arg);
  401. // Do not cast arguments if we emit function with non-original types.
  402. TargetArgs.emplace_back(
  403. FO.UIntPtrCastRequired
  404. ? Arg
  405. : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
  406. ++I;
  407. }
  408. Args.append(
  409. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  410. CD->param_end());
  411. TargetArgs.append(
  412. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  413. CD->param_end());
  414. // Create the function declaration.
  415. const CGFunctionInfo &FuncInfo =
  416. CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
  417. llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
  418. auto *F =
  419. llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
  420. FO.FunctionName, &CGM.getModule());
  421. CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
  422. if (CD->isNothrow())
  423. F->setDoesNotThrow();
  424. F->setDoesNotRecurse();
  425. // Generate the function.
  426. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
  427. FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
  428. unsigned Cnt = CD->getContextParamPosition();
  429. I = FO.S->captures().begin();
  430. for (const FieldDecl *FD : RD->fields()) {
  431. // Do not map arguments if we emit function with non-original types.
  432. Address LocalAddr(Address::invalid());
  433. if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
  434. LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
  435. TargetArgs[Cnt]);
  436. } else {
  437. LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
  438. }
  439. // If we are capturing a pointer by copy we don't need to do anything, just
  440. // use the value that we get from the arguments.
  441. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
  442. const VarDecl *CurVD = I->getCapturedVar();
  443. // If the variable is a reference we need to materialize it here.
  444. if (CurVD->getType()->isReferenceType()) {
  445. Address RefAddr = CGF.CreateMemTemp(
  446. CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
  447. CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
  448. /*Volatile=*/false, CurVD->getType());
  449. LocalAddr = RefAddr;
  450. }
  451. if (!FO.RegisterCastedArgsOnly)
  452. LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
  453. ++Cnt;
  454. ++I;
  455. continue;
  456. }
  457. LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
  458. AlignmentSource::Decl);
  459. if (FD->hasCapturedVLAType()) {
  460. if (FO.UIntPtrCastRequired) {
  461. ArgLVal = CGF.MakeAddrLValue(
  462. castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
  463. Args[Cnt]->getName(), ArgLVal),
  464. FD->getType(), AlignmentSource::Decl);
  465. }
  466. llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
  467. const VariableArrayType *VAT = FD->getCapturedVLAType();
  468. VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
  469. } else if (I->capturesVariable()) {
  470. const VarDecl *Var = I->getCapturedVar();
  471. QualType VarTy = Var->getType();
  472. Address ArgAddr = ArgLVal.getAddress();
  473. if (!VarTy->isReferenceType()) {
  474. if (ArgLVal.getType()->isLValueReferenceType()) {
  475. ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
  476. } else if (!VarTy->isVariablyModifiedType() ||
  477. !VarTy->isPointerType()) {
  478. assert(ArgLVal.getType()->isPointerType());
  479. ArgAddr = CGF.EmitLoadOfPointer(
  480. ArgAddr, ArgLVal.getType()->castAs<PointerType>());
  481. }
  482. }
  483. if (!FO.RegisterCastedArgsOnly) {
  484. LocalAddrs.insert(
  485. {Args[Cnt],
  486. {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
  487. }
  488. } else if (I->capturesVariableByCopy()) {
  489. assert(!FD->getType()->isAnyPointerType() &&
  490. "Not expecting a captured pointer.");
  491. const VarDecl *Var = I->getCapturedVar();
  492. QualType VarTy = Var->getType();
  493. LocalAddrs.insert(
  494. {Args[Cnt],
  495. {Var, FO.UIntPtrCastRequired
  496. ? castValueFromUintptr(CGF, I->getLocation(),
  497. FD->getType(), Args[Cnt]->getName(),
  498. ArgLVal, VarTy->isReferenceType())
  499. : ArgLVal.getAddress()}});
  500. } else {
  501. // If 'this' is captured, load it into CXXThisValue.
  502. assert(I->capturesThis());
  503. CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
  504. LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
  505. }
  506. ++Cnt;
  507. ++I;
  508. }
  509. return F;
  510. }
  511. llvm::Function *
  512. CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
  513. assert(
  514. CapturedStmtInfo &&
  515. "CapturedStmtInfo should be set when generating the captured function");
  516. const CapturedDecl *CD = S.getCapturedDecl();
  517. // Build the argument list.
  518. bool NeedWrapperFunction =
  519. getDebugInfo() &&
  520. CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
  521. FunctionArgList Args;
  522. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
  523. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
  524. SmallString<256> Buffer;
  525. llvm::raw_svector_ostream Out(Buffer);
  526. Out << CapturedStmtInfo->getHelperName();
  527. if (NeedWrapperFunction)
  528. Out << "_debug__";
  529. FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
  530. Out.str());
  531. llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
  532. VLASizes, CXXThisValue, FO);
  533. for (const auto &LocalAddrPair : LocalAddrs) {
  534. if (LocalAddrPair.second.first) {
  535. setAddrOfLocalVar(LocalAddrPair.second.first,
  536. LocalAddrPair.second.second);
  537. }
  538. }
  539. for (const auto &VLASizePair : VLASizes)
  540. VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
  541. PGO.assignRegionCounters(GlobalDecl(CD), F);
  542. CapturedStmtInfo->EmitBody(*this, CD->getBody());
  543. FinishFunction(CD->getBodyRBrace());
  544. if (!NeedWrapperFunction)
  545. return F;
  546. FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
  547. /*RegisterCastedArgsOnly=*/true,
  548. CapturedStmtInfo->getHelperName());
  549. CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
  550. WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
  551. Args.clear();
  552. LocalAddrs.clear();
  553. VLASizes.clear();
  554. llvm::Function *WrapperF =
  555. emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
  556. WrapperCGF.CXXThisValue, WrapperFO);
  557. llvm::SmallVector<llvm::Value *, 4> CallArgs;
  558. for (const auto *Arg : Args) {
  559. llvm::Value *CallArg;
  560. auto I = LocalAddrs.find(Arg);
  561. if (I != LocalAddrs.end()) {
  562. LValue LV = WrapperCGF.MakeAddrLValue(
  563. I->second.second,
  564. I->second.first ? I->second.first->getType() : Arg->getType(),
  565. AlignmentSource::Decl);
  566. CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
  567. } else {
  568. auto EI = VLASizes.find(Arg);
  569. if (EI != VLASizes.end()) {
  570. CallArg = EI->second.second;
  571. } else {
  572. LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
  573. Arg->getType(),
  574. AlignmentSource::Decl);
  575. CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
  576. }
  577. }
  578. CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
  579. }
  580. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
  581. F, CallArgs);
  582. WrapperCGF.FinishFunction();
  583. return WrapperF;
  584. }
  585. //===----------------------------------------------------------------------===//
  586. // OpenMP Directive Emission
  587. //===----------------------------------------------------------------------===//
  588. void CodeGenFunction::EmitOMPAggregateAssign(
  589. Address DestAddr, Address SrcAddr, QualType OriginalType,
  590. const llvm::function_ref<void(Address, Address)> CopyGen) {
  591. // Perform element-by-element initialization.
  592. QualType ElementTy;
  593. // Drill down to the base element type on both arrays.
  594. const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
  595. llvm::Value *NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
  596. SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
  597. llvm::Value *SrcBegin = SrcAddr.getPointer();
  598. llvm::Value *DestBegin = DestAddr.getPointer();
  599. // Cast from pointer to array type to pointer to single element.
  600. llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
  601. // The basic structure here is a while-do loop.
  602. llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
  603. llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
  604. llvm::Value *IsEmpty =
  605. Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
  606. Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  607. // Enter the loop body, making that address the current address.
  608. llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
  609. EmitBlock(BodyBB);
  610. CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
  611. llvm::PHINode *SrcElementPHI =
  612. Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
  613. SrcElementPHI->addIncoming(SrcBegin, EntryBB);
  614. Address SrcElementCurrent =
  615. Address(SrcElementPHI,
  616. SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  617. llvm::PHINode *DestElementPHI =
  618. Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
  619. DestElementPHI->addIncoming(DestBegin, EntryBB);
  620. Address DestElementCurrent =
  621. Address(DestElementPHI,
  622. DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  623. // Emit copy.
  624. CopyGen(DestElementCurrent, SrcElementCurrent);
  625. // Shift the address forward by one element.
  626. llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
  627. DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  628. llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
  629. SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
  630. // Check whether we've reached the end.
  631. llvm::Value *Done =
  632. Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
  633. Builder.CreateCondBr(Done, DoneBB, BodyBB);
  634. DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
  635. SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
  636. // Done.
  637. EmitBlock(DoneBB, /*IsFinished=*/true);
  638. }
  639. void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
  640. Address SrcAddr, const VarDecl *DestVD,
  641. const VarDecl *SrcVD, const Expr *Copy) {
  642. if (OriginalType->isArrayType()) {
  643. const auto *BO = dyn_cast<BinaryOperator>(Copy);
  644. if (BO && BO->getOpcode() == BO_Assign) {
  645. // Perform simple memcpy for simple copying.
  646. LValue Dest = MakeAddrLValue(DestAddr, OriginalType);
  647. LValue Src = MakeAddrLValue(SrcAddr, OriginalType);
  648. EmitAggregateAssign(Dest, Src, OriginalType);
  649. } else {
  650. // For arrays with complex element types perform element by element
  651. // copying.
  652. EmitOMPAggregateAssign(
  653. DestAddr, SrcAddr, OriginalType,
  654. [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
  655. // Working with the single array element, so have to remap
  656. // destination and source variables to corresponding array
  657. // elements.
  658. CodeGenFunction::OMPPrivateScope Remap(*this);
  659. Remap.addPrivate(DestVD, [DestElement]() { return DestElement; });
  660. Remap.addPrivate(SrcVD, [SrcElement]() { return SrcElement; });
  661. (void)Remap.Privatize();
  662. EmitIgnoredExpr(Copy);
  663. });
  664. }
  665. } else {
  666. // Remap pseudo source variable to private copy.
  667. CodeGenFunction::OMPPrivateScope Remap(*this);
  668. Remap.addPrivate(SrcVD, [SrcAddr]() { return SrcAddr; });
  669. Remap.addPrivate(DestVD, [DestAddr]() { return DestAddr; });
  670. (void)Remap.Privatize();
  671. // Emit copying of the whole variable.
  672. EmitIgnoredExpr(Copy);
  673. }
  674. }
  675. bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
  676. OMPPrivateScope &PrivateScope) {
  677. if (!HaveInsertPoint())
  678. return false;
  679. bool FirstprivateIsLastprivate = false;
  680. llvm::DenseSet<const VarDecl *> Lastprivates;
  681. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  682. for (const auto *D : C->varlists())
  683. Lastprivates.insert(
  684. cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
  685. }
  686. llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
  687. llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
  688. getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
  689. // Force emission of the firstprivate copy if the directive does not emit
  690. // outlined function, like omp for, omp simd, omp distribute etc.
  691. bool MustEmitFirstprivateCopy =
  692. CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
  693. for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
  694. auto IRef = C->varlist_begin();
  695. auto InitsRef = C->inits().begin();
  696. for (const Expr *IInit : C->private_copies()) {
  697. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  698. bool ThisFirstprivateIsLastprivate =
  699. Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
  700. const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD);
  701. if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
  702. !FD->getType()->isReferenceType()) {
  703. EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
  704. ++IRef;
  705. ++InitsRef;
  706. continue;
  707. }
  708. FirstprivateIsLastprivate =
  709. FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
  710. if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
  711. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  712. const auto *VDInit =
  713. cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
  714. bool IsRegistered;
  715. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  716. /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
  717. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  718. LValue OriginalLVal = EmitLValue(&DRE);
  719. QualType Type = VD->getType();
  720. if (Type->isArrayType()) {
  721. // Emit VarDecl with copy init for arrays.
  722. // Get the address of the original variable captured in current
  723. // captured region.
  724. IsRegistered = PrivateScope.addPrivate(
  725. OrigVD, [this, VD, Type, OriginalLVal, VDInit]() {
  726. AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
  727. const Expr *Init = VD->getInit();
  728. if (!isa<CXXConstructExpr>(Init) ||
  729. isTrivialInitializer(Init)) {
  730. // Perform simple memcpy.
  731. LValue Dest =
  732. MakeAddrLValue(Emission.getAllocatedAddress(), Type);
  733. EmitAggregateAssign(Dest, OriginalLVal, Type);
  734. } else {
  735. EmitOMPAggregateAssign(
  736. Emission.getAllocatedAddress(), OriginalLVal.getAddress(),
  737. Type,
  738. [this, VDInit, Init](Address DestElement,
  739. Address SrcElement) {
  740. // Clean up any temporaries needed by the
  741. // initialization.
  742. RunCleanupsScope InitScope(*this);
  743. // Emit initialization for single element.
  744. setAddrOfLocalVar(VDInit, SrcElement);
  745. EmitAnyExprToMem(Init, DestElement,
  746. Init->getType().getQualifiers(),
  747. /*IsInitializer*/ false);
  748. LocalDeclMap.erase(VDInit);
  749. });
  750. }
  751. EmitAutoVarCleanups(Emission);
  752. return Emission.getAllocatedAddress();
  753. });
  754. } else {
  755. Address OriginalAddr = OriginalLVal.getAddress();
  756. IsRegistered = PrivateScope.addPrivate(
  757. OrigVD, [this, VDInit, OriginalAddr, VD]() {
  758. // Emit private VarDecl with copy init.
  759. // Remap temp VDInit variable to the address of the original
  760. // variable (for proper handling of captured global variables).
  761. setAddrOfLocalVar(VDInit, OriginalAddr);
  762. EmitDecl(*VD);
  763. LocalDeclMap.erase(VDInit);
  764. return GetAddrOfLocalVar(VD);
  765. });
  766. }
  767. assert(IsRegistered &&
  768. "firstprivate var already registered as private");
  769. // Silence the warning about unused variable.
  770. (void)IsRegistered;
  771. }
  772. ++IRef;
  773. ++InitsRef;
  774. }
  775. }
  776. return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
  777. }
  778. void CodeGenFunction::EmitOMPPrivateClause(
  779. const OMPExecutableDirective &D,
  780. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  781. if (!HaveInsertPoint())
  782. return;
  783. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  784. for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
  785. auto IRef = C->varlist_begin();
  786. for (const Expr *IInit : C->private_copies()) {
  787. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  788. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  789. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  790. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
  791. // Emit private VarDecl with copy init.
  792. EmitDecl(*VD);
  793. return GetAddrOfLocalVar(VD);
  794. });
  795. assert(IsRegistered && "private var already registered as private");
  796. // Silence the warning about unused variable.
  797. (void)IsRegistered;
  798. }
  799. ++IRef;
  800. }
  801. }
  802. }
  803. bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
  804. if (!HaveInsertPoint())
  805. return false;
  806. // threadprivate_var1 = master_threadprivate_var1;
  807. // operator=(threadprivate_var2, master_threadprivate_var2);
  808. // ...
  809. // __kmpc_barrier(&loc, global_tid);
  810. llvm::DenseSet<const VarDecl *> CopiedVars;
  811. llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
  812. for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
  813. auto IRef = C->varlist_begin();
  814. auto ISrcRef = C->source_exprs().begin();
  815. auto IDestRef = C->destination_exprs().begin();
  816. for (const Expr *AssignOp : C->assignment_ops()) {
  817. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  818. QualType Type = VD->getType();
  819. if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
  820. // Get the address of the master variable. If we are emitting code with
  821. // TLS support, the address is passed from the master as field in the
  822. // captured declaration.
  823. Address MasterAddr = Address::invalid();
  824. if (getLangOpts().OpenMPUseTLS &&
  825. getContext().getTargetInfo().isTLSSupported()) {
  826. assert(CapturedStmtInfo->lookup(VD) &&
  827. "Copyin threadprivates should have been captured!");
  828. DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
  829. VK_LValue, (*IRef)->getExprLoc());
  830. MasterAddr = EmitLValue(&DRE).getAddress();
  831. LocalDeclMap.erase(VD);
  832. } else {
  833. MasterAddr =
  834. Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
  835. : CGM.GetAddrOfGlobal(VD),
  836. getContext().getDeclAlign(VD));
  837. }
  838. // Get the address of the threadprivate variable.
  839. Address PrivateAddr = EmitLValue(*IRef).getAddress();
  840. if (CopiedVars.size() == 1) {
  841. // At first check if current thread is a master thread. If it is, no
  842. // need to copy data.
  843. CopyBegin = createBasicBlock("copyin.not.master");
  844. CopyEnd = createBasicBlock("copyin.not.master.end");
  845. Builder.CreateCondBr(
  846. Builder.CreateICmpNE(
  847. Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
  848. Builder.CreatePtrToInt(PrivateAddr.getPointer(),
  849. CGM.IntPtrTy)),
  850. CopyBegin, CopyEnd);
  851. EmitBlock(CopyBegin);
  852. }
  853. const auto *SrcVD =
  854. cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  855. const auto *DestVD =
  856. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  857. EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
  858. }
  859. ++IRef;
  860. ++ISrcRef;
  861. ++IDestRef;
  862. }
  863. }
  864. if (CopyEnd) {
  865. // Exit out of copying procedure for non-master thread.
  866. EmitBlock(CopyEnd, /*IsFinished=*/true);
  867. return true;
  868. }
  869. return false;
  870. }
  871. bool CodeGenFunction::EmitOMPLastprivateClauseInit(
  872. const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
  873. if (!HaveInsertPoint())
  874. return false;
  875. bool HasAtLeastOneLastprivate = false;
  876. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  877. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  878. const auto *LoopDirective = cast<OMPLoopDirective>(&D);
  879. for (const Expr *C : LoopDirective->counters()) {
  880. SIMDLCVs.insert(
  881. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  882. }
  883. }
  884. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  885. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  886. HasAtLeastOneLastprivate = true;
  887. if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
  888. !getLangOpts().OpenMPSimd)
  889. break;
  890. auto IRef = C->varlist_begin();
  891. auto IDestRef = C->destination_exprs().begin();
  892. for (const Expr *IInit : C->private_copies()) {
  893. // Keep the address of the original variable for future update at the end
  894. // of the loop.
  895. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  896. // Taskloops do not require additional initialization, it is done in
  897. // runtime support library.
  898. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
  899. const auto *DestVD =
  900. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  901. PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
  902. DeclRefExpr DRE(
  903. const_cast<VarDecl *>(OrigVD),
  904. /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
  905. OrigVD) != nullptr,
  906. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  907. return EmitLValue(&DRE).getAddress();
  908. });
  909. // Check if the variable is also a firstprivate: in this case IInit is
  910. // not generated. Initialization of this variable will happen in codegen
  911. // for 'firstprivate' clause.
  912. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
  913. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  914. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
  915. // Emit private VarDecl with copy init.
  916. EmitDecl(*VD);
  917. return GetAddrOfLocalVar(VD);
  918. });
  919. assert(IsRegistered &&
  920. "lastprivate var already registered as private");
  921. (void)IsRegistered;
  922. }
  923. }
  924. ++IRef;
  925. ++IDestRef;
  926. }
  927. }
  928. return HasAtLeastOneLastprivate;
  929. }
  930. void CodeGenFunction::EmitOMPLastprivateClauseFinal(
  931. const OMPExecutableDirective &D, bool NoFinals,
  932. llvm::Value *IsLastIterCond) {
  933. if (!HaveInsertPoint())
  934. return;
  935. // Emit following code:
  936. // if (<IsLastIterCond>) {
  937. // orig_var1 = private_orig_var1;
  938. // ...
  939. // orig_varn = private_orig_varn;
  940. // }
  941. llvm::BasicBlock *ThenBB = nullptr;
  942. llvm::BasicBlock *DoneBB = nullptr;
  943. if (IsLastIterCond) {
  944. ThenBB = createBasicBlock(".omp.lastprivate.then");
  945. DoneBB = createBasicBlock(".omp.lastprivate.done");
  946. Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
  947. EmitBlock(ThenBB);
  948. }
  949. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  950. llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
  951. if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
  952. auto IC = LoopDirective->counters().begin();
  953. for (const Expr *F : LoopDirective->finals()) {
  954. const auto *D =
  955. cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
  956. if (NoFinals)
  957. AlreadyEmittedVars.insert(D);
  958. else
  959. LoopCountersAndUpdates[D] = F;
  960. ++IC;
  961. }
  962. }
  963. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  964. auto IRef = C->varlist_begin();
  965. auto ISrcRef = C->source_exprs().begin();
  966. auto IDestRef = C->destination_exprs().begin();
  967. for (const Expr *AssignOp : C->assignment_ops()) {
  968. const auto *PrivateVD =
  969. cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  970. QualType Type = PrivateVD->getType();
  971. const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
  972. if (AlreadyEmittedVars.insert(CanonicalVD).second) {
  973. // If lastprivate variable is a loop control variable for loop-based
  974. // directive, update its value before copyin back to original
  975. // variable.
  976. if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
  977. EmitIgnoredExpr(FinalExpr);
  978. const auto *SrcVD =
  979. cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  980. const auto *DestVD =
  981. cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  982. // Get the address of the original variable.
  983. Address OriginalAddr = GetAddrOfLocalVar(DestVD);
  984. // Get the address of the private variable.
  985. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
  986. if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
  987. PrivateAddr =
  988. Address(Builder.CreateLoad(PrivateAddr),
  989. getNaturalTypeAlignment(RefTy->getPointeeType()));
  990. EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
  991. }
  992. ++IRef;
  993. ++ISrcRef;
  994. ++IDestRef;
  995. }
  996. if (const Expr *PostUpdate = C->getPostUpdateExpr())
  997. EmitIgnoredExpr(PostUpdate);
  998. }
  999. if (IsLastIterCond)
  1000. EmitBlock(DoneBB, /*IsFinished=*/true);
  1001. }
  1002. void CodeGenFunction::EmitOMPReductionClauseInit(
  1003. const OMPExecutableDirective &D,
  1004. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  1005. if (!HaveInsertPoint())
  1006. return;
  1007. SmallVector<const Expr *, 4> Shareds;
  1008. SmallVector<const Expr *, 4> Privates;
  1009. SmallVector<const Expr *, 4> ReductionOps;
  1010. SmallVector<const Expr *, 4> LHSs;
  1011. SmallVector<const Expr *, 4> RHSs;
  1012. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1013. auto IPriv = C->privates().begin();
  1014. auto IRed = C->reduction_ops().begin();
  1015. auto ILHS = C->lhs_exprs().begin();
  1016. auto IRHS = C->rhs_exprs().begin();
  1017. for (const Expr *Ref : C->varlists()) {
  1018. Shareds.emplace_back(Ref);
  1019. Privates.emplace_back(*IPriv);
  1020. ReductionOps.emplace_back(*IRed);
  1021. LHSs.emplace_back(*ILHS);
  1022. RHSs.emplace_back(*IRHS);
  1023. std::advance(IPriv, 1);
  1024. std::advance(IRed, 1);
  1025. std::advance(ILHS, 1);
  1026. std::advance(IRHS, 1);
  1027. }
  1028. }
  1029. ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
  1030. unsigned Count = 0;
  1031. auto ILHS = LHSs.begin();
  1032. auto IRHS = RHSs.begin();
  1033. auto IPriv = Privates.begin();
  1034. for (const Expr *IRef : Shareds) {
  1035. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
  1036. // Emit private VarDecl with reduction init.
  1037. RedCG.emitSharedLValue(*this, Count);
  1038. RedCG.emitAggregateType(*this, Count);
  1039. AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
  1040. RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
  1041. RedCG.getSharedLValue(Count),
  1042. [&Emission](CodeGenFunction &CGF) {
  1043. CGF.EmitAutoVarInit(Emission);
  1044. return true;
  1045. });
  1046. EmitAutoVarCleanups(Emission);
  1047. Address BaseAddr = RedCG.adjustPrivateAddress(
  1048. *this, Count, Emission.getAllocatedAddress());
  1049. bool IsRegistered = PrivateScope.addPrivate(
  1050. RedCG.getBaseDecl(Count), [BaseAddr]() { return BaseAddr; });
  1051. assert(IsRegistered && "private var already registered as private");
  1052. // Silence the warning about unused variable.
  1053. (void)IsRegistered;
  1054. const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  1055. const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  1056. QualType Type = PrivateVD->getType();
  1057. bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
  1058. if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
  1059. // Store the address of the original variable associated with the LHS
  1060. // implicit variable.
  1061. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
  1062. return RedCG.getSharedLValue(Count).getAddress();
  1063. });
  1064. PrivateScope.addPrivate(
  1065. RHSVD, [this, PrivateVD]() { return GetAddrOfLocalVar(PrivateVD); });
  1066. } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
  1067. isa<ArraySubscriptExpr>(IRef)) {
  1068. // Store the address of the original variable associated with the LHS
  1069. // implicit variable.
  1070. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() {
  1071. return RedCG.getSharedLValue(Count).getAddress();
  1072. });
  1073. PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() {
  1074. return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
  1075. ConvertTypeForMem(RHSVD->getType()),
  1076. "rhs.begin");
  1077. });
  1078. } else {
  1079. QualType Type = PrivateVD->getType();
  1080. bool IsArray = getContext().getAsArrayType(Type) != nullptr;
  1081. Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
  1082. // Store the address of the original variable associated with the LHS
  1083. // implicit variable.
  1084. if (IsArray) {
  1085. OriginalAddr = Builder.CreateElementBitCast(
  1086. OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
  1087. }
  1088. PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
  1089. PrivateScope.addPrivate(
  1090. RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
  1091. return IsArray
  1092. ? Builder.CreateElementBitCast(
  1093. GetAddrOfLocalVar(PrivateVD),
  1094. ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
  1095. : GetAddrOfLocalVar(PrivateVD);
  1096. });
  1097. }
  1098. ++ILHS;
  1099. ++IRHS;
  1100. ++IPriv;
  1101. ++Count;
  1102. }
  1103. }
  1104. void CodeGenFunction::EmitOMPReductionClauseFinal(
  1105. const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
  1106. if (!HaveInsertPoint())
  1107. return;
  1108. llvm::SmallVector<const Expr *, 8> Privates;
  1109. llvm::SmallVector<const Expr *, 8> LHSExprs;
  1110. llvm::SmallVector<const Expr *, 8> RHSExprs;
  1111. llvm::SmallVector<const Expr *, 8> ReductionOps;
  1112. bool HasAtLeastOneReduction = false;
  1113. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1114. HasAtLeastOneReduction = true;
  1115. Privates.append(C->privates().begin(), C->privates().end());
  1116. LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
  1117. RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
  1118. ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
  1119. }
  1120. if (HasAtLeastOneReduction) {
  1121. bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
  1122. isOpenMPParallelDirective(D.getDirectiveKind()) ||
  1123. ReductionKind == OMPD_simd;
  1124. bool SimpleReduction = ReductionKind == OMPD_simd;
  1125. // Emit nowait reduction if nowait clause is present or directive is a
  1126. // parallel directive (it always has implicit barrier).
  1127. CGM.getOpenMPRuntime().emitReduction(
  1128. *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
  1129. {WithNowait, SimpleReduction, ReductionKind});
  1130. }
  1131. }
  1132. static void emitPostUpdateForReductionClause(
  1133. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  1134. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1135. if (!CGF.HaveInsertPoint())
  1136. return;
  1137. llvm::BasicBlock *DoneBB = nullptr;
  1138. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1139. if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
  1140. if (!DoneBB) {
  1141. if (llvm::Value *Cond = CondGen(CGF)) {
  1142. // If the first post-update expression is found, emit conditional
  1143. // block if it was requested.
  1144. llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
  1145. DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
  1146. CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1147. CGF.EmitBlock(ThenBB);
  1148. }
  1149. }
  1150. CGF.EmitIgnoredExpr(PostUpdate);
  1151. }
  1152. }
  1153. if (DoneBB)
  1154. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  1155. }
  1156. namespace {
  1157. /// Codegen lambda for appending distribute lower and upper bounds to outlined
  1158. /// parallel function. This is necessary for combined constructs such as
  1159. /// 'distribute parallel for'
  1160. typedef llvm::function_ref<void(CodeGenFunction &,
  1161. const OMPExecutableDirective &,
  1162. llvm::SmallVectorImpl<llvm::Value *> &)>
  1163. CodeGenBoundParametersTy;
  1164. } // anonymous namespace
  1165. static void emitCommonOMPParallelDirective(
  1166. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  1167. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  1168. const CodeGenBoundParametersTy &CodeGenBoundParameters) {
  1169. const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
  1170. llvm::Value *OutlinedFn =
  1171. CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
  1172. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  1173. if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
  1174. CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
  1175. llvm::Value *NumThreads =
  1176. CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
  1177. /*IgnoreResultAssign=*/true);
  1178. CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
  1179. CGF, NumThreads, NumThreadsClause->getBeginLoc());
  1180. }
  1181. if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
  1182. CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
  1183. CGF.CGM.getOpenMPRuntime().emitProcBindClause(
  1184. CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
  1185. }
  1186. const Expr *IfCond = nullptr;
  1187. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  1188. if (C->getNameModifier() == OMPD_unknown ||
  1189. C->getNameModifier() == OMPD_parallel) {
  1190. IfCond = C->getCondition();
  1191. break;
  1192. }
  1193. }
  1194. OMPParallelScope Scope(CGF, S);
  1195. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  1196. // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
  1197. // lower and upper bounds with the pragma 'for' chunking mechanism.
  1198. // The following lambda takes care of appending the lower and upper bound
  1199. // parameters when necessary
  1200. CodeGenBoundParameters(CGF, S, CapturedVars);
  1201. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  1202. CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
  1203. CapturedVars, IfCond);
  1204. }
  1205. static void emitEmptyBoundParameters(CodeGenFunction &,
  1206. const OMPExecutableDirective &,
  1207. llvm::SmallVectorImpl<llvm::Value *> &) {}
  1208. void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
  1209. // Emit parallel region as a standalone region.
  1210. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1211. Action.Enter(CGF);
  1212. OMPPrivateScope PrivateScope(CGF);
  1213. bool Copyins = CGF.EmitOMPCopyinClause(S);
  1214. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  1215. if (Copyins) {
  1216. // Emit implicit barrier to synchronize threads and avoid data races on
  1217. // propagation master's thread values of threadprivate variables to local
  1218. // instances of that variables of all other implicit threads.
  1219. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  1220. CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  1221. /*ForceSimpleCall=*/true);
  1222. }
  1223. CGF.EmitOMPPrivateClause(S, PrivateScope);
  1224. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  1225. (void)PrivateScope.Privatize();
  1226. CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
  1227. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  1228. };
  1229. emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
  1230. emitEmptyBoundParameters);
  1231. emitPostUpdateForReductionClause(*this, S,
  1232. [](CodeGenFunction &) { return nullptr; });
  1233. }
  1234. void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
  1235. JumpDest LoopExit) {
  1236. RunCleanupsScope BodyScope(*this);
  1237. // Update counters values on current iteration.
  1238. for (const Expr *UE : D.updates())
  1239. EmitIgnoredExpr(UE);
  1240. // Update the linear variables.
  1241. // In distribute directives only loop counters may be marked as linear, no
  1242. // need to generate the code for them.
  1243. if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
  1244. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1245. for (const Expr *UE : C->updates())
  1246. EmitIgnoredExpr(UE);
  1247. }
  1248. }
  1249. // On a continue in the body, jump to the end.
  1250. JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue");
  1251. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1252. // Emit loop body.
  1253. EmitStmt(D.getBody());
  1254. // The end (updates/cleanups).
  1255. EmitBlock(Continue.getBlock());
  1256. BreakContinueStack.pop_back();
  1257. }
  1258. void CodeGenFunction::EmitOMPInnerLoop(
  1259. const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
  1260. const Expr *IncExpr,
  1261. const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
  1262. const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
  1263. auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
  1264. // Start the loop with a block that tests the condition.
  1265. auto CondBlock = createBasicBlock("omp.inner.for.cond");
  1266. EmitBlock(CondBlock);
  1267. const SourceRange R = S.getSourceRange();
  1268. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1269. SourceLocToDebugLoc(R.getEnd()));
  1270. // If there are any cleanups between here and the loop-exit scope,
  1271. // create a block to stage a loop exit along.
  1272. llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
  1273. if (RequiresCleanup)
  1274. ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
  1275. llvm::BasicBlock *LoopBody = createBasicBlock("omp.inner.for.body");
  1276. // Emit condition.
  1277. EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
  1278. if (ExitBlock != LoopExit.getBlock()) {
  1279. EmitBlock(ExitBlock);
  1280. EmitBranchThroughCleanup(LoopExit);
  1281. }
  1282. EmitBlock(LoopBody);
  1283. incrementProfileCounter(&S);
  1284. // Create a block for the increment.
  1285. JumpDest Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
  1286. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1287. BodyGen(*this);
  1288. // Emit "IV = IV + 1" and a back-edge to the condition block.
  1289. EmitBlock(Continue.getBlock());
  1290. EmitIgnoredExpr(IncExpr);
  1291. PostIncGen(*this);
  1292. BreakContinueStack.pop_back();
  1293. EmitBranch(CondBlock);
  1294. LoopStack.pop();
  1295. // Emit the fall-through block.
  1296. EmitBlock(LoopExit.getBlock());
  1297. }
  1298. bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
  1299. if (!HaveInsertPoint())
  1300. return false;
  1301. // Emit inits for the linear variables.
  1302. bool HasLinears = false;
  1303. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1304. for (const Expr *Init : C->inits()) {
  1305. HasLinears = true;
  1306. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
  1307. if (const auto *Ref =
  1308. dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
  1309. AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
  1310. const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
  1311. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  1312. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1313. VD->getInit()->getType(), VK_LValue,
  1314. VD->getInit()->getExprLoc());
  1315. EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
  1316. VD->getType()),
  1317. /*capturedByInit=*/false);
  1318. EmitAutoVarCleanups(Emission);
  1319. } else {
  1320. EmitVarDecl(*VD);
  1321. }
  1322. }
  1323. // Emit the linear steps for the linear clauses.
  1324. // If a step is not constant, it is pre-calculated before the loop.
  1325. if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
  1326. if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
  1327. EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
  1328. // Emit calculation of the linear step.
  1329. EmitIgnoredExpr(CS);
  1330. }
  1331. }
  1332. return HasLinears;
  1333. }
  1334. void CodeGenFunction::EmitOMPLinearClauseFinal(
  1335. const OMPLoopDirective &D,
  1336. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1337. if (!HaveInsertPoint())
  1338. return;
  1339. llvm::BasicBlock *DoneBB = nullptr;
  1340. // Emit the final values of the linear variables.
  1341. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1342. auto IC = C->varlist_begin();
  1343. for (const Expr *F : C->finals()) {
  1344. if (!DoneBB) {
  1345. if (llvm::Value *Cond = CondGen(*this)) {
  1346. // If the first post-update expression is found, emit conditional
  1347. // block if it was requested.
  1348. llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
  1349. DoneBB = createBasicBlock(".omp.linear.pu.done");
  1350. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1351. EmitBlock(ThenBB);
  1352. }
  1353. }
  1354. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
  1355. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  1356. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1357. (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
  1358. Address OrigAddr = EmitLValue(&DRE).getAddress();
  1359. CodeGenFunction::OMPPrivateScope VarScope(*this);
  1360. VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
  1361. (void)VarScope.Privatize();
  1362. EmitIgnoredExpr(F);
  1363. ++IC;
  1364. }
  1365. if (const Expr *PostUpdate = C->getPostUpdateExpr())
  1366. EmitIgnoredExpr(PostUpdate);
  1367. }
  1368. if (DoneBB)
  1369. EmitBlock(DoneBB, /*IsFinished=*/true);
  1370. }
  1371. static void emitAlignedClause(CodeGenFunction &CGF,
  1372. const OMPExecutableDirective &D) {
  1373. if (!CGF.HaveInsertPoint())
  1374. return;
  1375. for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
  1376. unsigned ClauseAlignment = 0;
  1377. if (const Expr *AlignmentExpr = Clause->getAlignment()) {
  1378. auto *AlignmentCI =
  1379. cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
  1380. ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
  1381. }
  1382. for (const Expr *E : Clause->varlists()) {
  1383. unsigned Alignment = ClauseAlignment;
  1384. if (Alignment == 0) {
  1385. // OpenMP [2.8.1, Description]
  1386. // If no optional parameter is specified, implementation-defined default
  1387. // alignments for SIMD instructions on the target platforms are assumed.
  1388. Alignment =
  1389. CGF.getContext()
  1390. .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
  1391. E->getType()->getPointeeType()))
  1392. .getQuantity();
  1393. }
  1394. assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
  1395. "alignment is not power of 2");
  1396. if (Alignment != 0) {
  1397. llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
  1398. CGF.EmitAlignmentAssumption(PtrValue, Alignment);
  1399. }
  1400. }
  1401. }
  1402. }
  1403. void CodeGenFunction::EmitOMPPrivateLoopCounters(
  1404. const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
  1405. if (!HaveInsertPoint())
  1406. return;
  1407. auto I = S.private_counters().begin();
  1408. for (const Expr *E : S.counters()) {
  1409. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1410. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
  1411. // Emit var without initialization.
  1412. AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
  1413. EmitAutoVarCleanups(VarEmission);
  1414. LocalDeclMap.erase(PrivateVD);
  1415. (void)LoopScope.addPrivate(VD, [&VarEmission]() {
  1416. return VarEmission.getAllocatedAddress();
  1417. });
  1418. if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
  1419. VD->hasGlobalStorage()) {
  1420. (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
  1421. DeclRefExpr DRE(const_cast<VarDecl *>(VD),
  1422. LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
  1423. E->getType(), VK_LValue, E->getExprLoc());
  1424. return EmitLValue(&DRE).getAddress();
  1425. });
  1426. } else {
  1427. (void)LoopScope.addPrivate(PrivateVD, [&VarEmission]() {
  1428. return VarEmission.getAllocatedAddress();
  1429. });
  1430. }
  1431. ++I;
  1432. }
  1433. // Privatize extra loop counters used in loops for ordered(n) clauses.
  1434. for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
  1435. if (!C->getNumForLoops())
  1436. continue;
  1437. for (unsigned I = S.getCollapsedNumber(),
  1438. E = C->getLoopNumIterations().size();
  1439. I < E; ++I) {
  1440. const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
  1441. const auto *VD = cast<VarDecl>(DRE->getDecl());
  1442. // Override only those variables that are really emitted already.
  1443. if (LocalDeclMap.count(VD)) {
  1444. (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
  1445. return CreateMemTemp(DRE->getType(), VD->getName());
  1446. });
  1447. }
  1448. }
  1449. }
  1450. }
  1451. static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1452. const Expr *Cond, llvm::BasicBlock *TrueBlock,
  1453. llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
  1454. if (!CGF.HaveInsertPoint())
  1455. return;
  1456. {
  1457. CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
  1458. CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
  1459. (void)PreCondScope.Privatize();
  1460. // Get initial values of real counters.
  1461. for (const Expr *I : S.inits()) {
  1462. CGF.EmitIgnoredExpr(I);
  1463. }
  1464. }
  1465. // Check that loop is executed at least one time.
  1466. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
  1467. }
  1468. void CodeGenFunction::EmitOMPLinearClause(
  1469. const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
  1470. if (!HaveInsertPoint())
  1471. return;
  1472. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  1473. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  1474. const auto *LoopDirective = cast<OMPLoopDirective>(&D);
  1475. for (const Expr *C : LoopDirective->counters()) {
  1476. SIMDLCVs.insert(
  1477. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  1478. }
  1479. }
  1480. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1481. auto CurPrivate = C->privates().begin();
  1482. for (const Expr *E : C->varlists()) {
  1483. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1484. const auto *PrivateVD =
  1485. cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
  1486. if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
  1487. bool IsRegistered = PrivateScope.addPrivate(VD, [this, PrivateVD]() {
  1488. // Emit private VarDecl with copy init.
  1489. EmitVarDecl(*PrivateVD);
  1490. return GetAddrOfLocalVar(PrivateVD);
  1491. });
  1492. assert(IsRegistered && "linear var already registered as private");
  1493. // Silence the warning about unused variable.
  1494. (void)IsRegistered;
  1495. } else {
  1496. EmitVarDecl(*PrivateVD);
  1497. }
  1498. ++CurPrivate;
  1499. }
  1500. }
  1501. }
  1502. static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
  1503. const OMPExecutableDirective &D,
  1504. bool IsMonotonic) {
  1505. if (!CGF.HaveInsertPoint())
  1506. return;
  1507. if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
  1508. RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
  1509. /*ignoreResult=*/true);
  1510. auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1511. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1512. // In presence of finite 'safelen', it may be unsafe to mark all
  1513. // the memory instructions parallel, because loop-carried
  1514. // dependences of 'safelen' iterations are possible.
  1515. if (!IsMonotonic)
  1516. CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
  1517. } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
  1518. RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
  1519. /*ignoreResult=*/true);
  1520. auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1521. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1522. // In presence of finite 'safelen', it may be unsafe to mark all
  1523. // the memory instructions parallel, because loop-carried
  1524. // dependences of 'safelen' iterations are possible.
  1525. CGF.LoopStack.setParallel(/*Enable=*/false);
  1526. }
  1527. }
  1528. void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
  1529. bool IsMonotonic) {
  1530. // Walk clauses and process safelen/lastprivate.
  1531. LoopStack.setParallel(!IsMonotonic);
  1532. LoopStack.setVectorizeEnable();
  1533. emitSimdlenSafelenClause(*this, D, IsMonotonic);
  1534. }
  1535. void CodeGenFunction::EmitOMPSimdFinal(
  1536. const OMPLoopDirective &D,
  1537. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
  1538. if (!HaveInsertPoint())
  1539. return;
  1540. llvm::BasicBlock *DoneBB = nullptr;
  1541. auto IC = D.counters().begin();
  1542. auto IPC = D.private_counters().begin();
  1543. for (const Expr *F : D.finals()) {
  1544. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
  1545. const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
  1546. const auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
  1547. if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
  1548. OrigVD->hasGlobalStorage() || CED) {
  1549. if (!DoneBB) {
  1550. if (llvm::Value *Cond = CondGen(*this)) {
  1551. // If the first post-update expression is found, emit conditional
  1552. // block if it was requested.
  1553. llvm::BasicBlock *ThenBB = createBasicBlock(".omp.final.then");
  1554. DoneBB = createBasicBlock(".omp.final.done");
  1555. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1556. EmitBlock(ThenBB);
  1557. }
  1558. }
  1559. Address OrigAddr = Address::invalid();
  1560. if (CED) {
  1561. OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
  1562. } else {
  1563. DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
  1564. /*RefersToEnclosingVariableOrCapture=*/false,
  1565. (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
  1566. OrigAddr = EmitLValue(&DRE).getAddress();
  1567. }
  1568. OMPPrivateScope VarScope(*this);
  1569. VarScope.addPrivate(OrigVD, [OrigAddr]() { return OrigAddr; });
  1570. (void)VarScope.Privatize();
  1571. EmitIgnoredExpr(F);
  1572. }
  1573. ++IC;
  1574. ++IPC;
  1575. }
  1576. if (DoneBB)
  1577. EmitBlock(DoneBB, /*IsFinished=*/true);
  1578. }
  1579. static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
  1580. const OMPLoopDirective &S,
  1581. CodeGenFunction::JumpDest LoopExit) {
  1582. CGF.EmitOMPLoopBody(S, LoopExit);
  1583. CGF.EmitStopPoint(&S);
  1584. }
  1585. /// Emit a helper variable and return corresponding lvalue.
  1586. static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
  1587. const DeclRefExpr *Helper) {
  1588. auto VDecl = cast<VarDecl>(Helper->getDecl());
  1589. CGF.EmitVarDecl(*VDecl);
  1590. return CGF.EmitLValue(Helper);
  1591. }
  1592. static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1593. PrePostActionTy &Action) {
  1594. Action.Enter(CGF);
  1595. assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
  1596. "Expected simd directive");
  1597. OMPLoopScope PreInitScope(CGF, S);
  1598. // if (PreCond) {
  1599. // for (IV in 0..LastIteration) BODY;
  1600. // <Final counter/linear vars updates>;
  1601. // }
  1602. //
  1603. if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
  1604. isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
  1605. isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
  1606. (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
  1607. (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
  1608. }
  1609. // Emit: if (PreCond) - begin.
  1610. // If the condition constant folds and can be elided, avoid emitting the
  1611. // whole loop.
  1612. bool CondConstant;
  1613. llvm::BasicBlock *ContBlock = nullptr;
  1614. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  1615. if (!CondConstant)
  1616. return;
  1617. } else {
  1618. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("simd.if.then");
  1619. ContBlock = CGF.createBasicBlock("simd.if.end");
  1620. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  1621. CGF.getProfileCount(&S));
  1622. CGF.EmitBlock(ThenBlock);
  1623. CGF.incrementProfileCounter(&S);
  1624. }
  1625. // Emit the loop iteration variable.
  1626. const Expr *IVExpr = S.getIterationVariable();
  1627. const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  1628. CGF.EmitVarDecl(*IVDecl);
  1629. CGF.EmitIgnoredExpr(S.getInit());
  1630. // Emit the iterations count variable.
  1631. // If it is not a variable, Sema decided to calculate iterations count on
  1632. // each iteration (e.g., it is foldable into a constant).
  1633. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  1634. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  1635. // Emit calculation of the iterations count.
  1636. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  1637. }
  1638. CGF.EmitOMPSimdInit(S);
  1639. emitAlignedClause(CGF, S);
  1640. (void)CGF.EmitOMPLinearClauseInit(S);
  1641. {
  1642. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  1643. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  1644. CGF.EmitOMPLinearClause(S, LoopScope);
  1645. CGF.EmitOMPPrivateClause(S, LoopScope);
  1646. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  1647. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  1648. (void)LoopScope.Privatize();
  1649. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  1650. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  1651. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  1652. S.getInc(),
  1653. [&S](CodeGenFunction &CGF) {
  1654. CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
  1655. CGF.EmitStopPoint(&S);
  1656. },
  1657. [](CodeGenFunction &) {});
  1658. CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
  1659. // Emit final copy of the lastprivate variables at the end of loops.
  1660. if (HasLastprivateClause)
  1661. CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
  1662. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
  1663. emitPostUpdateForReductionClause(CGF, S,
  1664. [](CodeGenFunction &) { return nullptr; });
  1665. }
  1666. CGF.EmitOMPLinearClauseFinal(S, [](CodeGenFunction &) { return nullptr; });
  1667. // Emit: if (PreCond) - end.
  1668. if (ContBlock) {
  1669. CGF.EmitBranch(ContBlock);
  1670. CGF.EmitBlock(ContBlock, true);
  1671. }
  1672. }
  1673. void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
  1674. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1675. emitOMPSimdRegion(CGF, S, Action);
  1676. };
  1677. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  1678. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  1679. }
  1680. void CodeGenFunction::EmitOMPOuterLoop(
  1681. bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
  1682. CodeGenFunction::OMPPrivateScope &LoopScope,
  1683. const CodeGenFunction::OMPLoopArguments &LoopArgs,
  1684. const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
  1685. const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
  1686. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1687. const Expr *IVExpr = S.getIterationVariable();
  1688. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1689. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1690. JumpDest LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
  1691. // Start the loop with a block that tests the condition.
  1692. llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
  1693. EmitBlock(CondBlock);
  1694. const SourceRange R = S.getSourceRange();
  1695. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1696. SourceLocToDebugLoc(R.getEnd()));
  1697. llvm::Value *BoolCondVal = nullptr;
  1698. if (!DynamicOrOrdered) {
  1699. // UB = min(UB, GlobalUB) or
  1700. // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
  1701. // 'distribute parallel for')
  1702. EmitIgnoredExpr(LoopArgs.EUB);
  1703. // IV = LB
  1704. EmitIgnoredExpr(LoopArgs.Init);
  1705. // IV < UB
  1706. BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
  1707. } else {
  1708. BoolCondVal =
  1709. RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
  1710. LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
  1711. }
  1712. // If there are any cleanups between here and the loop-exit scope,
  1713. // create a block to stage a loop exit along.
  1714. llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
  1715. if (LoopScope.requiresCleanups())
  1716. ExitBlock = createBasicBlock("omp.dispatch.cleanup");
  1717. llvm::BasicBlock *LoopBody = createBasicBlock("omp.dispatch.body");
  1718. Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
  1719. if (ExitBlock != LoopExit.getBlock()) {
  1720. EmitBlock(ExitBlock);
  1721. EmitBranchThroughCleanup(LoopExit);
  1722. }
  1723. EmitBlock(LoopBody);
  1724. // Emit "IV = LB" (in case of static schedule, we have already calculated new
  1725. // LB for loop condition and emitted it above).
  1726. if (DynamicOrOrdered)
  1727. EmitIgnoredExpr(LoopArgs.Init);
  1728. // Create a block for the increment.
  1729. JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
  1730. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1731. // Generate !llvm.loop.parallel metadata for loads and stores for loops
  1732. // with dynamic/guided scheduling and without ordered clause.
  1733. if (!isOpenMPSimdDirective(S.getDirectiveKind()))
  1734. LoopStack.setParallel(!IsMonotonic);
  1735. else
  1736. EmitOMPSimdInit(S, IsMonotonic);
  1737. SourceLocation Loc = S.getBeginLoc();
  1738. // when 'distribute' is not combined with a 'for':
  1739. // while (idx <= UB) { BODY; ++idx; }
  1740. // when 'distribute' is combined with a 'for'
  1741. // (e.g. 'distribute parallel for')
  1742. // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
  1743. EmitOMPInnerLoop(
  1744. S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
  1745. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  1746. CodeGenLoop(CGF, S, LoopExit);
  1747. },
  1748. [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
  1749. CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
  1750. });
  1751. EmitBlock(Continue.getBlock());
  1752. BreakContinueStack.pop_back();
  1753. if (!DynamicOrOrdered) {
  1754. // Emit "LB = LB + Stride", "UB = UB + Stride".
  1755. EmitIgnoredExpr(LoopArgs.NextLB);
  1756. EmitIgnoredExpr(LoopArgs.NextUB);
  1757. }
  1758. EmitBranch(CondBlock);
  1759. LoopStack.pop();
  1760. // Emit the fall-through block.
  1761. EmitBlock(LoopExit.getBlock());
  1762. // Tell the runtime we are done.
  1763. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
  1764. if (!DynamicOrOrdered)
  1765. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  1766. S.getDirectiveKind());
  1767. };
  1768. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  1769. }
  1770. void CodeGenFunction::EmitOMPForOuterLoop(
  1771. const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
  1772. const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
  1773. const OMPLoopArguments &LoopArgs,
  1774. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  1775. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1776. // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
  1777. const bool DynamicOrOrdered =
  1778. Ordered || RT.isDynamic(ScheduleKind.Schedule);
  1779. assert((Ordered ||
  1780. !RT.isStaticNonchunked(ScheduleKind.Schedule,
  1781. LoopArgs.Chunk != nullptr)) &&
  1782. "static non-chunked schedule does not need outer loop");
  1783. // Emit outer loop.
  1784. //
  1785. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1786. // When schedule(dynamic,chunk_size) is specified, the iterations are
  1787. // distributed to threads in the team in chunks as the threads request them.
  1788. // Each thread executes a chunk of iterations, then requests another chunk,
  1789. // until no chunks remain to be distributed. Each chunk contains chunk_size
  1790. // iterations, except for the last chunk to be distributed, which may have
  1791. // fewer iterations. When no chunk_size is specified, it defaults to 1.
  1792. //
  1793. // When schedule(guided,chunk_size) is specified, the iterations are assigned
  1794. // to threads in the team in chunks as the executing threads request them.
  1795. // Each thread executes a chunk of iterations, then requests another chunk,
  1796. // until no chunks remain to be assigned. For a chunk_size of 1, the size of
  1797. // each chunk is proportional to the number of unassigned iterations divided
  1798. // by the number of threads in the team, decreasing to 1. For a chunk_size
  1799. // with value k (greater than 1), the size of each chunk is determined in the
  1800. // same way, with the restriction that the chunks do not contain fewer than k
  1801. // iterations (except for the last chunk to be assigned, which may have fewer
  1802. // than k iterations).
  1803. //
  1804. // When schedule(auto) is specified, the decision regarding scheduling is
  1805. // delegated to the compiler and/or runtime system. The programmer gives the
  1806. // implementation the freedom to choose any possible mapping of iterations to
  1807. // threads in the team.
  1808. //
  1809. // When schedule(runtime) is specified, the decision regarding scheduling is
  1810. // deferred until run time, and the schedule and chunk size are taken from the
  1811. // run-sched-var ICV. If the ICV is set to auto, the schedule is
  1812. // implementation defined
  1813. //
  1814. // while(__kmpc_dispatch_next(&LB, &UB)) {
  1815. // idx = LB;
  1816. // while (idx <= UB) { BODY; ++idx;
  1817. // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
  1818. // } // inner loop
  1819. // }
  1820. //
  1821. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1822. // When schedule(static, chunk_size) is specified, iterations are divided into
  1823. // chunks of size chunk_size, and the chunks are assigned to the threads in
  1824. // the team in a round-robin fashion in the order of the thread number.
  1825. //
  1826. // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
  1827. // while (idx <= UB) { BODY; ++idx; } // inner loop
  1828. // LB = LB + ST;
  1829. // UB = UB + ST;
  1830. // }
  1831. //
  1832. const Expr *IVExpr = S.getIterationVariable();
  1833. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1834. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1835. if (DynamicOrOrdered) {
  1836. const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
  1837. CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
  1838. llvm::Value *LBVal = DispatchBounds.first;
  1839. llvm::Value *UBVal = DispatchBounds.second;
  1840. CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
  1841. LoopArgs.Chunk};
  1842. RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
  1843. IVSigned, Ordered, DipatchRTInputValues);
  1844. } else {
  1845. CGOpenMPRuntime::StaticRTInput StaticInit(
  1846. IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
  1847. LoopArgs.ST, LoopArgs.Chunk);
  1848. RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
  1849. ScheduleKind, StaticInit);
  1850. }
  1851. auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
  1852. const unsigned IVSize,
  1853. const bool IVSigned) {
  1854. if (Ordered) {
  1855. CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
  1856. IVSigned);
  1857. }
  1858. };
  1859. OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
  1860. LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
  1861. OuterLoopArgs.IncExpr = S.getInc();
  1862. OuterLoopArgs.Init = S.getInit();
  1863. OuterLoopArgs.Cond = S.getCond();
  1864. OuterLoopArgs.NextLB = S.getNextLowerBound();
  1865. OuterLoopArgs.NextUB = S.getNextUpperBound();
  1866. EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
  1867. emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
  1868. }
  1869. static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
  1870. const unsigned IVSize, const bool IVSigned) {}
  1871. void CodeGenFunction::EmitOMPDistributeOuterLoop(
  1872. OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
  1873. OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
  1874. const CodeGenLoopTy &CodeGenLoopContent) {
  1875. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  1876. // Emit outer loop.
  1877. // Same behavior as a OMPForOuterLoop, except that schedule cannot be
  1878. // dynamic
  1879. //
  1880. const Expr *IVExpr = S.getIterationVariable();
  1881. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1882. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1883. CGOpenMPRuntime::StaticRTInput StaticInit(
  1884. IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
  1885. LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
  1886. RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
  1887. // for combined 'distribute' and 'for' the increment expression of distribute
  1888. // is stored in DistInc. For 'distribute' alone, it is in Inc.
  1889. Expr *IncExpr;
  1890. if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
  1891. IncExpr = S.getDistInc();
  1892. else
  1893. IncExpr = S.getInc();
  1894. // this routine is shared by 'omp distribute parallel for' and
  1895. // 'omp distribute': select the right EUB expression depending on the
  1896. // directive
  1897. OMPLoopArguments OuterLoopArgs;
  1898. OuterLoopArgs.LB = LoopArgs.LB;
  1899. OuterLoopArgs.UB = LoopArgs.UB;
  1900. OuterLoopArgs.ST = LoopArgs.ST;
  1901. OuterLoopArgs.IL = LoopArgs.IL;
  1902. OuterLoopArgs.Chunk = LoopArgs.Chunk;
  1903. OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1904. ? S.getCombinedEnsureUpperBound()
  1905. : S.getEnsureUpperBound();
  1906. OuterLoopArgs.IncExpr = IncExpr;
  1907. OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1908. ? S.getCombinedInit()
  1909. : S.getInit();
  1910. OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1911. ? S.getCombinedCond()
  1912. : S.getCond();
  1913. OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1914. ? S.getCombinedNextLowerBound()
  1915. : S.getNextLowerBound();
  1916. OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1917. ? S.getCombinedNextUpperBound()
  1918. : S.getNextUpperBound();
  1919. EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
  1920. LoopScope, OuterLoopArgs, CodeGenLoopContent,
  1921. emitEmptyOrdered);
  1922. }
  1923. static std::pair<LValue, LValue>
  1924. emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
  1925. const OMPExecutableDirective &S) {
  1926. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  1927. LValue LB =
  1928. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  1929. LValue UB =
  1930. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  1931. // When composing 'distribute' with 'for' (e.g. as in 'distribute
  1932. // parallel for') we need to use the 'distribute'
  1933. // chunk lower and upper bounds rather than the whole loop iteration
  1934. // space. These are parameters to the outlined function for 'parallel'
  1935. // and we copy the bounds of the previous schedule into the
  1936. // the current ones.
  1937. LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
  1938. LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
  1939. llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
  1940. PrevLB, LS.getPrevLowerBoundVariable()->getExprLoc());
  1941. PrevLBVal = CGF.EmitScalarConversion(
  1942. PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
  1943. LS.getIterationVariable()->getType(),
  1944. LS.getPrevLowerBoundVariable()->getExprLoc());
  1945. llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
  1946. PrevUB, LS.getPrevUpperBoundVariable()->getExprLoc());
  1947. PrevUBVal = CGF.EmitScalarConversion(
  1948. PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
  1949. LS.getIterationVariable()->getType(),
  1950. LS.getPrevUpperBoundVariable()->getExprLoc());
  1951. CGF.EmitStoreOfScalar(PrevLBVal, LB);
  1952. CGF.EmitStoreOfScalar(PrevUBVal, UB);
  1953. return {LB, UB};
  1954. }
  1955. /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
  1956. /// we need to use the LB and UB expressions generated by the worksharing
  1957. /// code generation support, whereas in non combined situations we would
  1958. /// just emit 0 and the LastIteration expression
  1959. /// This function is necessary due to the difference of the LB and UB
  1960. /// types for the RT emission routines for 'for_static_init' and
  1961. /// 'for_dispatch_init'
  1962. static std::pair<llvm::Value *, llvm::Value *>
  1963. emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
  1964. const OMPExecutableDirective &S,
  1965. Address LB, Address UB) {
  1966. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  1967. const Expr *IVExpr = LS.getIterationVariable();
  1968. // when implementing a dynamic schedule for a 'for' combined with a
  1969. // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
  1970. // is not normalized as each team only executes its own assigned
  1971. // distribute chunk
  1972. QualType IteratorTy = IVExpr->getType();
  1973. llvm::Value *LBVal =
  1974. CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
  1975. llvm::Value *UBVal =
  1976. CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
  1977. return {LBVal, UBVal};
  1978. }
  1979. static void emitDistributeParallelForDistributeInnerBoundParams(
  1980. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  1981. llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
  1982. const auto &Dir = cast<OMPLoopDirective>(S);
  1983. LValue LB =
  1984. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
  1985. llvm::Value *LBCast = CGF.Builder.CreateIntCast(
  1986. CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  1987. CapturedVars.push_back(LBCast);
  1988. LValue UB =
  1989. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
  1990. llvm::Value *UBCast = CGF.Builder.CreateIntCast(
  1991. CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  1992. CapturedVars.push_back(UBCast);
  1993. }
  1994. static void
  1995. emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
  1996. const OMPLoopDirective &S,
  1997. CodeGenFunction::JumpDest LoopExit) {
  1998. auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
  1999. PrePostActionTy &Action) {
  2000. Action.Enter(CGF);
  2001. bool HasCancel = false;
  2002. if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
  2003. if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
  2004. HasCancel = D->hasCancel();
  2005. else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
  2006. HasCancel = D->hasCancel();
  2007. else if (const auto *D =
  2008. dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
  2009. HasCancel = D->hasCancel();
  2010. }
  2011. CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
  2012. HasCancel);
  2013. CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
  2014. emitDistributeParallelForInnerBounds,
  2015. emitDistributeParallelForDispatchBounds);
  2016. };
  2017. emitCommonOMPParallelDirective(
  2018. CGF, S,
  2019. isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
  2020. CGInlinedWorksharingLoop,
  2021. emitDistributeParallelForDistributeInnerBoundParams);
  2022. }
  2023. void CodeGenFunction::EmitOMPDistributeParallelForDirective(
  2024. const OMPDistributeParallelForDirective &S) {
  2025. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2026. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  2027. S.getDistInc());
  2028. };
  2029. OMPLexicalScope Scope(*this, S, OMPD_parallel);
  2030. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  2031. }
  2032. void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
  2033. const OMPDistributeParallelForSimdDirective &S) {
  2034. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2035. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  2036. S.getDistInc());
  2037. };
  2038. OMPLexicalScope Scope(*this, S, OMPD_parallel);
  2039. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  2040. }
  2041. void CodeGenFunction::EmitOMPDistributeSimdDirective(
  2042. const OMPDistributeSimdDirective &S) {
  2043. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2044. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  2045. };
  2046. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2047. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  2048. }
  2049. void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
  2050. CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
  2051. // Emit SPMD target parallel for region as a standalone region.
  2052. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2053. emitOMPSimdRegion(CGF, S, Action);
  2054. };
  2055. llvm::Function *Fn;
  2056. llvm::Constant *Addr;
  2057. // Emit target region as a standalone region.
  2058. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  2059. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  2060. assert(Fn && Addr && "Target device function emission failed.");
  2061. }
  2062. void CodeGenFunction::EmitOMPTargetSimdDirective(
  2063. const OMPTargetSimdDirective &S) {
  2064. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2065. emitOMPSimdRegion(CGF, S, Action);
  2066. };
  2067. emitCommonOMPTargetDirective(*this, S, CodeGen);
  2068. }
  2069. namespace {
  2070. struct ScheduleKindModifiersTy {
  2071. OpenMPScheduleClauseKind Kind;
  2072. OpenMPScheduleClauseModifier M1;
  2073. OpenMPScheduleClauseModifier M2;
  2074. ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
  2075. OpenMPScheduleClauseModifier M1,
  2076. OpenMPScheduleClauseModifier M2)
  2077. : Kind(Kind), M1(M1), M2(M2) {}
  2078. };
  2079. } // namespace
  2080. bool CodeGenFunction::EmitOMPWorksharingLoop(
  2081. const OMPLoopDirective &S, Expr *EUB,
  2082. const CodeGenLoopBoundsTy &CodeGenLoopBounds,
  2083. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  2084. // Emit the loop iteration variable.
  2085. const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  2086. const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
  2087. EmitVarDecl(*IVDecl);
  2088. // Emit the iterations count variable.
  2089. // If it is not a variable, Sema decided to calculate iterations count on each
  2090. // iteration (e.g., it is foldable into a constant).
  2091. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  2092. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  2093. // Emit calculation of the iterations count.
  2094. EmitIgnoredExpr(S.getCalcLastIteration());
  2095. }
  2096. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  2097. bool HasLastprivateClause;
  2098. // Check pre-condition.
  2099. {
  2100. OMPLoopScope PreInitScope(*this, S);
  2101. // Skip the entire loop if we don't meet the precondition.
  2102. // If the condition constant folds and can be elided, avoid emitting the
  2103. // whole loop.
  2104. bool CondConstant;
  2105. llvm::BasicBlock *ContBlock = nullptr;
  2106. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  2107. if (!CondConstant)
  2108. return false;
  2109. } else {
  2110. llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
  2111. ContBlock = createBasicBlock("omp.precond.end");
  2112. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  2113. getProfileCount(&S));
  2114. EmitBlock(ThenBlock);
  2115. incrementProfileCounter(&S);
  2116. }
  2117. RunCleanupsScope DoacrossCleanupScope(*this);
  2118. bool Ordered = false;
  2119. if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
  2120. if (OrderedClause->getNumForLoops())
  2121. RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
  2122. else
  2123. Ordered = true;
  2124. }
  2125. llvm::DenseSet<const Expr *> EmittedFinals;
  2126. emitAlignedClause(*this, S);
  2127. bool HasLinears = EmitOMPLinearClauseInit(S);
  2128. // Emit helper vars inits.
  2129. std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
  2130. LValue LB = Bounds.first;
  2131. LValue UB = Bounds.second;
  2132. LValue ST =
  2133. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  2134. LValue IL =
  2135. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  2136. // Emit 'then' code.
  2137. {
  2138. OMPPrivateScope LoopScope(*this);
  2139. if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
  2140. // Emit implicit barrier to synchronize threads and avoid data races on
  2141. // initialization of firstprivate variables and post-update of
  2142. // lastprivate variables.
  2143. CGM.getOpenMPRuntime().emitBarrierCall(
  2144. *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  2145. /*ForceSimpleCall=*/true);
  2146. }
  2147. EmitOMPPrivateClause(S, LoopScope);
  2148. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  2149. EmitOMPReductionClauseInit(S, LoopScope);
  2150. EmitOMPPrivateLoopCounters(S, LoopScope);
  2151. EmitOMPLinearClause(S, LoopScope);
  2152. (void)LoopScope.Privatize();
  2153. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  2154. CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
  2155. // Detect the loop schedule kind and chunk.
  2156. const Expr *ChunkExpr = nullptr;
  2157. OpenMPScheduleTy ScheduleKind;
  2158. if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
  2159. ScheduleKind.Schedule = C->getScheduleKind();
  2160. ScheduleKind.M1 = C->getFirstScheduleModifier();
  2161. ScheduleKind.M2 = C->getSecondScheduleModifier();
  2162. ChunkExpr = C->getChunkSize();
  2163. } else {
  2164. // Default behaviour for schedule clause.
  2165. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
  2166. *this, S, ScheduleKind.Schedule, ChunkExpr);
  2167. }
  2168. bool HasChunkSizeOne = false;
  2169. llvm::Value *Chunk = nullptr;
  2170. if (ChunkExpr) {
  2171. Chunk = EmitScalarExpr(ChunkExpr);
  2172. Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
  2173. S.getIterationVariable()->getType(),
  2174. S.getBeginLoc());
  2175. llvm::APSInt EvaluatedChunk;
  2176. if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext()))
  2177. HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
  2178. }
  2179. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  2180. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  2181. // OpenMP 4.5, 2.7.1 Loop Construct, Description.
  2182. // If the static schedule kind is specified or if the ordered clause is
  2183. // specified, and if no monotonic modifier is specified, the effect will
  2184. // be as if the monotonic modifier was specified.
  2185. bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
  2186. /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
  2187. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
  2188. if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
  2189. /* Chunked */ Chunk != nullptr) ||
  2190. StaticChunkedOne) &&
  2191. !Ordered) {
  2192. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  2193. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  2194. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  2195. // When no chunk_size is specified, the iteration space is divided into
  2196. // chunks that are approximately equal in size, and at most one chunk is
  2197. // distributed to each thread. Note that the size of the chunks is
  2198. // unspecified in this case.
  2199. CGOpenMPRuntime::StaticRTInput StaticInit(
  2200. IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
  2201. UB.getAddress(), ST.getAddress(),
  2202. StaticChunkedOne ? Chunk : nullptr);
  2203. RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
  2204. ScheduleKind, StaticInit);
  2205. JumpDest LoopExit =
  2206. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  2207. // UB = min(UB, GlobalUB);
  2208. if (!StaticChunkedOne)
  2209. EmitIgnoredExpr(S.getEnsureUpperBound());
  2210. // IV = LB;
  2211. EmitIgnoredExpr(S.getInit());
  2212. // For unchunked static schedule generate:
  2213. //
  2214. // while (idx <= UB) {
  2215. // BODY;
  2216. // ++idx;
  2217. // }
  2218. //
  2219. // For static schedule with chunk one:
  2220. //
  2221. // while (IV <= PrevUB) {
  2222. // BODY;
  2223. // IV += ST;
  2224. // }
  2225. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
  2226. StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
  2227. StaticChunkedOne ? S.getDistInc() : S.getInc(),
  2228. [&S, LoopExit](CodeGenFunction &CGF) {
  2229. CGF.EmitOMPLoopBody(S, LoopExit);
  2230. CGF.EmitStopPoint(&S);
  2231. },
  2232. [](CodeGenFunction &) {});
  2233. EmitBlock(LoopExit.getBlock());
  2234. // Tell the runtime we are done.
  2235. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2236. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  2237. S.getDirectiveKind());
  2238. };
  2239. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  2240. } else {
  2241. const bool IsMonotonic =
  2242. Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
  2243. ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
  2244. ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
  2245. ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
  2246. // Emit the outer loop, which requests its work chunk [LB..UB] from
  2247. // runtime and runs the inner loop to process it.
  2248. const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
  2249. ST.getAddress(), IL.getAddress(),
  2250. Chunk, EUB);
  2251. EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
  2252. LoopArguments, CGDispatchBounds);
  2253. }
  2254. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  2255. EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
  2256. return CGF.Builder.CreateIsNotNull(
  2257. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2258. });
  2259. }
  2260. EmitOMPReductionClauseFinal(
  2261. S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
  2262. ? /*Parallel and Simd*/ OMPD_parallel_for_simd
  2263. : /*Parallel only*/ OMPD_parallel);
  2264. // Emit post-update of the reduction variables if IsLastIter != 0.
  2265. emitPostUpdateForReductionClause(
  2266. *this, S, [IL, &S](CodeGenFunction &CGF) {
  2267. return CGF.Builder.CreateIsNotNull(
  2268. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2269. });
  2270. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2271. if (HasLastprivateClause)
  2272. EmitOMPLastprivateClauseFinal(
  2273. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  2274. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
  2275. }
  2276. EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
  2277. return CGF.Builder.CreateIsNotNull(
  2278. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2279. });
  2280. DoacrossCleanupScope.ForceCleanup();
  2281. // We're now done with the loop, so jump to the continuation block.
  2282. if (ContBlock) {
  2283. EmitBranch(ContBlock);
  2284. EmitBlock(ContBlock, /*IsFinished=*/true);
  2285. }
  2286. }
  2287. return HasLastprivateClause;
  2288. }
  2289. /// The following two functions generate expressions for the loop lower
  2290. /// and upper bounds in case of static and dynamic (dispatch) schedule
  2291. /// of the associated 'for' or 'distribute' loop.
  2292. static std::pair<LValue, LValue>
  2293. emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  2294. const auto &LS = cast<OMPLoopDirective>(S);
  2295. LValue LB =
  2296. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  2297. LValue UB =
  2298. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  2299. return {LB, UB};
  2300. }
  2301. /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
  2302. /// consider the lower and upper bound expressions generated by the
  2303. /// worksharing loop support, but we use 0 and the iteration space size as
  2304. /// constants
  2305. static std::pair<llvm::Value *, llvm::Value *>
  2306. emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
  2307. Address LB, Address UB) {
  2308. const auto &LS = cast<OMPLoopDirective>(S);
  2309. const Expr *IVExpr = LS.getIterationVariable();
  2310. const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
  2311. llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
  2312. llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
  2313. return {LBVal, UBVal};
  2314. }
  2315. void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
  2316. bool HasLastprivates = false;
  2317. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2318. PrePostActionTy &) {
  2319. OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
  2320. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2321. emitForLoopBounds,
  2322. emitDispatchForLoopBounds);
  2323. };
  2324. {
  2325. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2326. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
  2327. S.hasCancel());
  2328. }
  2329. // Emit an implicit barrier at the end.
  2330. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
  2331. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
  2332. }
  2333. void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
  2334. bool HasLastprivates = false;
  2335. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2336. PrePostActionTy &) {
  2337. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2338. emitForLoopBounds,
  2339. emitDispatchForLoopBounds);
  2340. };
  2341. {
  2342. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2343. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  2344. }
  2345. // Emit an implicit barrier at the end.
  2346. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
  2347. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
  2348. }
  2349. static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
  2350. const Twine &Name,
  2351. llvm::Value *Init = nullptr) {
  2352. LValue LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
  2353. if (Init)
  2354. CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
  2355. return LVal;
  2356. }
  2357. void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
  2358. const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
  2359. const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
  2360. bool HasLastprivates = false;
  2361. auto &&CodeGen = [&S, CapturedStmt, CS,
  2362. &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
  2363. ASTContext &C = CGF.getContext();
  2364. QualType KmpInt32Ty =
  2365. C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  2366. // Emit helper vars inits.
  2367. LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
  2368. CGF.Builder.getInt32(0));
  2369. llvm::ConstantInt *GlobalUBVal = CS != nullptr
  2370. ? CGF.Builder.getInt32(CS->size() - 1)
  2371. : CGF.Builder.getInt32(0);
  2372. LValue UB =
  2373. createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
  2374. LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
  2375. CGF.Builder.getInt32(1));
  2376. LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
  2377. CGF.Builder.getInt32(0));
  2378. // Loop counter.
  2379. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
  2380. OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
  2381. CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
  2382. OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
  2383. CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
  2384. // Generate condition for loop.
  2385. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
  2386. OK_Ordinary, S.getBeginLoc(), FPOptions());
  2387. // Increment for loop counter.
  2388. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
  2389. S.getBeginLoc(), true);
  2390. auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
  2391. // Iterate through all sections and emit a switch construct:
  2392. // switch (IV) {
  2393. // case 0:
  2394. // <SectionStmt[0]>;
  2395. // break;
  2396. // ...
  2397. // case <NumSection> - 1:
  2398. // <SectionStmt[<NumSection> - 1]>;
  2399. // break;
  2400. // }
  2401. // .omp.sections.exit:
  2402. llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
  2403. llvm::SwitchInst *SwitchStmt =
  2404. CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
  2405. ExitBB, CS == nullptr ? 1 : CS->size());
  2406. if (CS) {
  2407. unsigned CaseNumber = 0;
  2408. for (const Stmt *SubStmt : CS->children()) {
  2409. auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2410. CGF.EmitBlock(CaseBB);
  2411. SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
  2412. CGF.EmitStmt(SubStmt);
  2413. CGF.EmitBranch(ExitBB);
  2414. ++CaseNumber;
  2415. }
  2416. } else {
  2417. llvm::BasicBlock *CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2418. CGF.EmitBlock(CaseBB);
  2419. SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
  2420. CGF.EmitStmt(CapturedStmt);
  2421. CGF.EmitBranch(ExitBB);
  2422. }
  2423. CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
  2424. };
  2425. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  2426. if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
  2427. // Emit implicit barrier to synchronize threads and avoid data races on
  2428. // initialization of firstprivate variables and post-update of lastprivate
  2429. // variables.
  2430. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  2431. CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  2432. /*ForceSimpleCall=*/true);
  2433. }
  2434. CGF.EmitOMPPrivateClause(S, LoopScope);
  2435. HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  2436. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  2437. (void)LoopScope.Privatize();
  2438. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  2439. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  2440. // Emit static non-chunked loop.
  2441. OpenMPScheduleTy ScheduleKind;
  2442. ScheduleKind.Schedule = OMPC_SCHEDULE_static;
  2443. CGOpenMPRuntime::StaticRTInput StaticInit(
  2444. /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
  2445. LB.getAddress(), UB.getAddress(), ST.getAddress());
  2446. CGF.CGM.getOpenMPRuntime().emitForStaticInit(
  2447. CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
  2448. // UB = min(UB, GlobalUB);
  2449. llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
  2450. llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
  2451. CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
  2452. CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
  2453. // IV = LB;
  2454. CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
  2455. // while (idx <= UB) { BODY; ++idx; }
  2456. CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
  2457. [](CodeGenFunction &) {});
  2458. // Tell the runtime we are done.
  2459. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2460. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
  2461. S.getDirectiveKind());
  2462. };
  2463. CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
  2464. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  2465. // Emit post-update of the reduction variables if IsLastIter != 0.
  2466. emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
  2467. return CGF.Builder.CreateIsNotNull(
  2468. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  2469. });
  2470. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2471. if (HasLastprivates)
  2472. CGF.EmitOMPLastprivateClauseFinal(
  2473. S, /*NoFinals=*/false,
  2474. CGF.Builder.CreateIsNotNull(
  2475. CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
  2476. };
  2477. bool HasCancel = false;
  2478. if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
  2479. HasCancel = OSD->hasCancel();
  2480. else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
  2481. HasCancel = OPSD->hasCancel();
  2482. OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
  2483. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
  2484. HasCancel);
  2485. // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
  2486. // clause. Otherwise the barrier will be generated by the codegen for the
  2487. // directive.
  2488. if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
  2489. // Emit implicit barrier to synchronize threads and avoid data races on
  2490. // initialization of firstprivate variables.
  2491. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
  2492. OMPD_unknown);
  2493. }
  2494. }
  2495. void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
  2496. {
  2497. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2498. EmitSections(S);
  2499. }
  2500. // Emit an implicit barrier at the end.
  2501. if (!S.getSingleClause<OMPNowaitClause>()) {
  2502. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
  2503. OMPD_sections);
  2504. }
  2505. }
  2506. void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
  2507. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2508. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2509. };
  2510. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2511. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
  2512. S.hasCancel());
  2513. }
  2514. void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
  2515. llvm::SmallVector<const Expr *, 8> CopyprivateVars;
  2516. llvm::SmallVector<const Expr *, 8> DestExprs;
  2517. llvm::SmallVector<const Expr *, 8> SrcExprs;
  2518. llvm::SmallVector<const Expr *, 8> AssignmentOps;
  2519. // Check if there are any 'copyprivate' clauses associated with this
  2520. // 'single' construct.
  2521. // Build a list of copyprivate variables along with helper expressions
  2522. // (<source>, <destination>, <destination>=<source> expressions)
  2523. for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
  2524. CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
  2525. DestExprs.append(C->destination_exprs().begin(),
  2526. C->destination_exprs().end());
  2527. SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
  2528. AssignmentOps.append(C->assignment_ops().begin(),
  2529. C->assignment_ops().end());
  2530. }
  2531. // Emit code for 'single' region along with 'copyprivate' clauses
  2532. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2533. Action.Enter(CGF);
  2534. OMPPrivateScope SingleScope(CGF);
  2535. (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
  2536. CGF.EmitOMPPrivateClause(S, SingleScope);
  2537. (void)SingleScope.Privatize();
  2538. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2539. };
  2540. {
  2541. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2542. CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
  2543. CopyprivateVars, DestExprs,
  2544. SrcExprs, AssignmentOps);
  2545. }
  2546. // Emit an implicit barrier at the end (to avoid data race on firstprivate
  2547. // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
  2548. if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
  2549. CGM.getOpenMPRuntime().emitBarrierCall(
  2550. *this, S.getBeginLoc(),
  2551. S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
  2552. }
  2553. }
  2554. void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
  2555. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2556. Action.Enter(CGF);
  2557. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2558. };
  2559. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2560. CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
  2561. }
  2562. void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
  2563. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2564. Action.Enter(CGF);
  2565. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  2566. };
  2567. const Expr *Hint = nullptr;
  2568. if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
  2569. Hint = HintClause->getHint();
  2570. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  2571. CGM.getOpenMPRuntime().emitCriticalRegion(*this,
  2572. S.getDirectiveName().getAsString(),
  2573. CodeGen, S.getBeginLoc(), Hint);
  2574. }
  2575. void CodeGenFunction::EmitOMPParallelForDirective(
  2576. const OMPParallelForDirective &S) {
  2577. // Emit directive as a combined directive that consists of two implicit
  2578. // directives: 'parallel' with 'for' directive.
  2579. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2580. Action.Enter(CGF);
  2581. OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
  2582. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2583. emitDispatchForLoopBounds);
  2584. };
  2585. emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
  2586. emitEmptyBoundParameters);
  2587. }
  2588. void CodeGenFunction::EmitOMPParallelForSimdDirective(
  2589. const OMPParallelForSimdDirective &S) {
  2590. // Emit directive as a combined directive that consists of two implicit
  2591. // directives: 'parallel' with 'for' directive.
  2592. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2593. Action.Enter(CGF);
  2594. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2595. emitDispatchForLoopBounds);
  2596. };
  2597. emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
  2598. emitEmptyBoundParameters);
  2599. }
  2600. void CodeGenFunction::EmitOMPParallelSectionsDirective(
  2601. const OMPParallelSectionsDirective &S) {
  2602. // Emit directive as a combined directive that consists of two implicit
  2603. // directives: 'parallel' with 'sections' directive.
  2604. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2605. Action.Enter(CGF);
  2606. CGF.EmitSections(S);
  2607. };
  2608. emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
  2609. emitEmptyBoundParameters);
  2610. }
  2611. void CodeGenFunction::EmitOMPTaskBasedDirective(
  2612. const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
  2613. const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
  2614. OMPTaskDataTy &Data) {
  2615. // Emit outlined function for task construct.
  2616. const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
  2617. auto I = CS->getCapturedDecl()->param_begin();
  2618. auto PartId = std::next(I);
  2619. auto TaskT = std::next(I, 4);
  2620. // Check if the task is final
  2621. if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
  2622. // If the condition constant folds and can be elided, try to avoid emitting
  2623. // the condition and the dead arm of the if/else.
  2624. const Expr *Cond = Clause->getCondition();
  2625. bool CondConstant;
  2626. if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
  2627. Data.Final.setInt(CondConstant);
  2628. else
  2629. Data.Final.setPointer(EvaluateExprAsBool(Cond));
  2630. } else {
  2631. // By default the task is not final.
  2632. Data.Final.setInt(/*IntVal=*/false);
  2633. }
  2634. // Check if the task has 'priority' clause.
  2635. if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
  2636. const Expr *Prio = Clause->getPriority();
  2637. Data.Priority.setInt(/*IntVal=*/true);
  2638. Data.Priority.setPointer(EmitScalarConversion(
  2639. EmitScalarExpr(Prio), Prio->getType(),
  2640. getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
  2641. Prio->getExprLoc()));
  2642. }
  2643. // The first function argument for tasks is a thread id, the second one is a
  2644. // part id (0 for tied tasks, >=0 for untied task).
  2645. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  2646. // Get list of private variables.
  2647. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
  2648. auto IRef = C->varlist_begin();
  2649. for (const Expr *IInit : C->private_copies()) {
  2650. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2651. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2652. Data.PrivateVars.push_back(*IRef);
  2653. Data.PrivateCopies.push_back(IInit);
  2654. }
  2655. ++IRef;
  2656. }
  2657. }
  2658. EmittedAsPrivate.clear();
  2659. // Get list of firstprivate variables.
  2660. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  2661. auto IRef = C->varlist_begin();
  2662. auto IElemInitRef = C->inits().begin();
  2663. for (const Expr *IInit : C->private_copies()) {
  2664. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2665. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2666. Data.FirstprivateVars.push_back(*IRef);
  2667. Data.FirstprivateCopies.push_back(IInit);
  2668. Data.FirstprivateInits.push_back(*IElemInitRef);
  2669. }
  2670. ++IRef;
  2671. ++IElemInitRef;
  2672. }
  2673. }
  2674. // Get list of lastprivate variables (for taskloops).
  2675. llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
  2676. for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
  2677. auto IRef = C->varlist_begin();
  2678. auto ID = C->destination_exprs().begin();
  2679. for (const Expr *IInit : C->private_copies()) {
  2680. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2681. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2682. Data.LastprivateVars.push_back(*IRef);
  2683. Data.LastprivateCopies.push_back(IInit);
  2684. }
  2685. LastprivateDstsOrigs.insert(
  2686. {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
  2687. cast<DeclRefExpr>(*IRef)});
  2688. ++IRef;
  2689. ++ID;
  2690. }
  2691. }
  2692. SmallVector<const Expr *, 4> LHSs;
  2693. SmallVector<const Expr *, 4> RHSs;
  2694. for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
  2695. auto IPriv = C->privates().begin();
  2696. auto IRed = C->reduction_ops().begin();
  2697. auto ILHS = C->lhs_exprs().begin();
  2698. auto IRHS = C->rhs_exprs().begin();
  2699. for (const Expr *Ref : C->varlists()) {
  2700. Data.ReductionVars.emplace_back(Ref);
  2701. Data.ReductionCopies.emplace_back(*IPriv);
  2702. Data.ReductionOps.emplace_back(*IRed);
  2703. LHSs.emplace_back(*ILHS);
  2704. RHSs.emplace_back(*IRHS);
  2705. std::advance(IPriv, 1);
  2706. std::advance(IRed, 1);
  2707. std::advance(ILHS, 1);
  2708. std::advance(IRHS, 1);
  2709. }
  2710. }
  2711. Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
  2712. *this, S.getBeginLoc(), LHSs, RHSs, Data);
  2713. // Build list of dependences.
  2714. for (const auto *C : S.getClausesOfKind<OMPDependClause>())
  2715. for (const Expr *IRef : C->varlists())
  2716. Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
  2717. auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
  2718. CapturedRegion](CodeGenFunction &CGF,
  2719. PrePostActionTy &Action) {
  2720. // Set proper addresses for generated private copies.
  2721. OMPPrivateScope Scope(CGF);
  2722. if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
  2723. !Data.LastprivateVars.empty()) {
  2724. enum { PrivatesParam = 2, CopyFnParam = 3 };
  2725. llvm::Value *CopyFn = CGF.Builder.CreateLoad(
  2726. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
  2727. llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
  2728. CS->getCapturedDecl()->getParam(PrivatesParam)));
  2729. // Map privates.
  2730. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
  2731. llvm::SmallVector<llvm::Value *, 16> CallArgs;
  2732. CallArgs.push_back(PrivatesPtr);
  2733. for (const Expr *E : Data.PrivateVars) {
  2734. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2735. Address PrivatePtr = CGF.CreateMemTemp(
  2736. CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
  2737. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2738. CallArgs.push_back(PrivatePtr.getPointer());
  2739. }
  2740. for (const Expr *E : Data.FirstprivateVars) {
  2741. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2742. Address PrivatePtr =
  2743. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2744. ".firstpriv.ptr.addr");
  2745. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2746. CallArgs.push_back(PrivatePtr.getPointer());
  2747. }
  2748. for (const Expr *E : Data.LastprivateVars) {
  2749. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2750. Address PrivatePtr =
  2751. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2752. ".lastpriv.ptr.addr");
  2753. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2754. CallArgs.push_back(PrivatePtr.getPointer());
  2755. }
  2756. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
  2757. CopyFn, CallArgs);
  2758. for (const auto &Pair : LastprivateDstsOrigs) {
  2759. const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
  2760. DeclRefExpr DRE(
  2761. const_cast<VarDecl *>(OrigVD),
  2762. /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
  2763. OrigVD) != nullptr,
  2764. Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
  2765. Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
  2766. return CGF.EmitLValue(&DRE).getAddress();
  2767. });
  2768. }
  2769. for (const auto &Pair : PrivatePtrs) {
  2770. Address Replacement(CGF.Builder.CreateLoad(Pair.second),
  2771. CGF.getContext().getDeclAlign(Pair.first));
  2772. Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
  2773. }
  2774. }
  2775. if (Data.Reductions) {
  2776. OMPLexicalScope LexScope(CGF, S, CapturedRegion);
  2777. ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
  2778. Data.ReductionOps);
  2779. llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
  2780. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
  2781. for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
  2782. RedCG.emitSharedLValue(CGF, Cnt);
  2783. RedCG.emitAggregateType(CGF, Cnt);
  2784. // FIXME: This must removed once the runtime library is fixed.
  2785. // Emit required threadprivate variables for
  2786. // initilizer/combiner/finalizer.
  2787. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
  2788. RedCG, Cnt);
  2789. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2790. CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2791. Replacement =
  2792. Address(CGF.EmitScalarConversion(
  2793. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2794. CGF.getContext().getPointerType(
  2795. Data.ReductionCopies[Cnt]->getType()),
  2796. Data.ReductionCopies[Cnt]->getExprLoc()),
  2797. Replacement.getAlignment());
  2798. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2799. Scope.addPrivate(RedCG.getBaseDecl(Cnt),
  2800. [Replacement]() { return Replacement; });
  2801. }
  2802. }
  2803. // Privatize all private variables except for in_reduction items.
  2804. (void)Scope.Privatize();
  2805. SmallVector<const Expr *, 4> InRedVars;
  2806. SmallVector<const Expr *, 4> InRedPrivs;
  2807. SmallVector<const Expr *, 4> InRedOps;
  2808. SmallVector<const Expr *, 4> TaskgroupDescriptors;
  2809. for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
  2810. auto IPriv = C->privates().begin();
  2811. auto IRed = C->reduction_ops().begin();
  2812. auto ITD = C->taskgroup_descriptors().begin();
  2813. for (const Expr *Ref : C->varlists()) {
  2814. InRedVars.emplace_back(Ref);
  2815. InRedPrivs.emplace_back(*IPriv);
  2816. InRedOps.emplace_back(*IRed);
  2817. TaskgroupDescriptors.emplace_back(*ITD);
  2818. std::advance(IPriv, 1);
  2819. std::advance(IRed, 1);
  2820. std::advance(ITD, 1);
  2821. }
  2822. }
  2823. // Privatize in_reduction items here, because taskgroup descriptors must be
  2824. // privatized earlier.
  2825. OMPPrivateScope InRedScope(CGF);
  2826. if (!InRedVars.empty()) {
  2827. ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
  2828. for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
  2829. RedCG.emitSharedLValue(CGF, Cnt);
  2830. RedCG.emitAggregateType(CGF, Cnt);
  2831. // The taskgroup descriptor variable is always implicit firstprivate and
  2832. // privatized already during procoessing of the firstprivates.
  2833. // FIXME: This must removed once the runtime library is fixed.
  2834. // Emit required threadprivate variables for
  2835. // initilizer/combiner/finalizer.
  2836. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
  2837. RedCG, Cnt);
  2838. llvm::Value *ReductionsPtr =
  2839. CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
  2840. TaskgroupDescriptors[Cnt]->getExprLoc());
  2841. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2842. CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2843. Replacement = Address(
  2844. CGF.EmitScalarConversion(
  2845. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2846. CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
  2847. InRedPrivs[Cnt]->getExprLoc()),
  2848. Replacement.getAlignment());
  2849. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2850. InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
  2851. [Replacement]() { return Replacement; });
  2852. }
  2853. }
  2854. (void)InRedScope.Privatize();
  2855. Action.Enter(CGF);
  2856. BodyGen(CGF);
  2857. };
  2858. llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
  2859. S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
  2860. Data.NumberOfParts);
  2861. OMPLexicalScope Scope(*this, S);
  2862. TaskGen(*this, OutlinedFn, Data);
  2863. }
  2864. static ImplicitParamDecl *
  2865. createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
  2866. QualType Ty, CapturedDecl *CD,
  2867. SourceLocation Loc) {
  2868. auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
  2869. ImplicitParamDecl::Other);
  2870. auto *OrigRef = DeclRefExpr::Create(
  2871. C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
  2872. /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
  2873. auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
  2874. ImplicitParamDecl::Other);
  2875. auto *PrivateRef = DeclRefExpr::Create(
  2876. C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
  2877. /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
  2878. QualType ElemType = C.getBaseElementType(Ty);
  2879. auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
  2880. ImplicitParamDecl::Other);
  2881. auto *InitRef = DeclRefExpr::Create(
  2882. C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
  2883. /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
  2884. PrivateVD->setInitStyle(VarDecl::CInit);
  2885. PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
  2886. InitRef, /*BasePath=*/nullptr,
  2887. VK_RValue));
  2888. Data.FirstprivateVars.emplace_back(OrigRef);
  2889. Data.FirstprivateCopies.emplace_back(PrivateRef);
  2890. Data.FirstprivateInits.emplace_back(InitRef);
  2891. return OrigVD;
  2892. }
  2893. void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
  2894. const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
  2895. OMPTargetDataInfo &InputInfo) {
  2896. // Emit outlined function for task construct.
  2897. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
  2898. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  2899. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  2900. auto I = CS->getCapturedDecl()->param_begin();
  2901. auto PartId = std::next(I);
  2902. auto TaskT = std::next(I, 4);
  2903. OMPTaskDataTy Data;
  2904. // The task is not final.
  2905. Data.Final.setInt(/*IntVal=*/false);
  2906. // Get list of firstprivate variables.
  2907. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  2908. auto IRef = C->varlist_begin();
  2909. auto IElemInitRef = C->inits().begin();
  2910. for (auto *IInit : C->private_copies()) {
  2911. Data.FirstprivateVars.push_back(*IRef);
  2912. Data.FirstprivateCopies.push_back(IInit);
  2913. Data.FirstprivateInits.push_back(*IElemInitRef);
  2914. ++IRef;
  2915. ++IElemInitRef;
  2916. }
  2917. }
  2918. OMPPrivateScope TargetScope(*this);
  2919. VarDecl *BPVD = nullptr;
  2920. VarDecl *PVD = nullptr;
  2921. VarDecl *SVD = nullptr;
  2922. if (InputInfo.NumberOfTargetItems > 0) {
  2923. auto *CD = CapturedDecl::Create(
  2924. getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
  2925. llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
  2926. QualType BaseAndPointersType = getContext().getConstantArrayType(
  2927. getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
  2928. /*IndexTypeQuals=*/0);
  2929. BPVD = createImplicitFirstprivateForType(
  2930. getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
  2931. PVD = createImplicitFirstprivateForType(
  2932. getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
  2933. QualType SizesType = getContext().getConstantArrayType(
  2934. getContext().getSizeType(), ArrSize, ArrayType::Normal,
  2935. /*IndexTypeQuals=*/0);
  2936. SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
  2937. S.getBeginLoc());
  2938. TargetScope.addPrivate(
  2939. BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
  2940. TargetScope.addPrivate(PVD,
  2941. [&InputInfo]() { return InputInfo.PointersArray; });
  2942. TargetScope.addPrivate(SVD,
  2943. [&InputInfo]() { return InputInfo.SizesArray; });
  2944. }
  2945. (void)TargetScope.Privatize();
  2946. // Build list of dependences.
  2947. for (const auto *C : S.getClausesOfKind<OMPDependClause>())
  2948. for (const Expr *IRef : C->varlists())
  2949. Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
  2950. auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
  2951. &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2952. // Set proper addresses for generated private copies.
  2953. OMPPrivateScope Scope(CGF);
  2954. if (!Data.FirstprivateVars.empty()) {
  2955. enum { PrivatesParam = 2, CopyFnParam = 3 };
  2956. llvm::Value *CopyFn = CGF.Builder.CreateLoad(
  2957. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
  2958. llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
  2959. CS->getCapturedDecl()->getParam(PrivatesParam)));
  2960. // Map privates.
  2961. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
  2962. llvm::SmallVector<llvm::Value *, 16> CallArgs;
  2963. CallArgs.push_back(PrivatesPtr);
  2964. for (const Expr *E : Data.FirstprivateVars) {
  2965. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2966. Address PrivatePtr =
  2967. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2968. ".firstpriv.ptr.addr");
  2969. PrivatePtrs.emplace_back(VD, PrivatePtr);
  2970. CallArgs.push_back(PrivatePtr.getPointer());
  2971. }
  2972. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
  2973. CopyFn, CallArgs);
  2974. for (const auto &Pair : PrivatePtrs) {
  2975. Address Replacement(CGF.Builder.CreateLoad(Pair.second),
  2976. CGF.getContext().getDeclAlign(Pair.first));
  2977. Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
  2978. }
  2979. }
  2980. // Privatize all private variables except for in_reduction items.
  2981. (void)Scope.Privatize();
  2982. if (InputInfo.NumberOfTargetItems > 0) {
  2983. InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
  2984. CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
  2985. InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
  2986. CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
  2987. InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
  2988. CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
  2989. }
  2990. Action.Enter(CGF);
  2991. OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
  2992. BodyGen(CGF);
  2993. };
  2994. llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
  2995. S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
  2996. Data.NumberOfParts);
  2997. llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
  2998. IntegerLiteral IfCond(getContext(), TrueOrFalse,
  2999. getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
  3000. SourceLocation());
  3001. CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
  3002. SharedsTy, CapturedStruct, &IfCond, Data);
  3003. }
  3004. void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
  3005. // Emit outlined function for task construct.
  3006. const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
  3007. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  3008. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  3009. const Expr *IfCond = nullptr;
  3010. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3011. if (C->getNameModifier() == OMPD_unknown ||
  3012. C->getNameModifier() == OMPD_task) {
  3013. IfCond = C->getCondition();
  3014. break;
  3015. }
  3016. }
  3017. OMPTaskDataTy Data;
  3018. // Check if we should emit tied or untied task.
  3019. Data.Tied = !S.getSingleClause<OMPUntiedClause>();
  3020. auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
  3021. CGF.EmitStmt(CS->getCapturedStmt());
  3022. };
  3023. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  3024. IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
  3025. const OMPTaskDataTy &Data) {
  3026. CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
  3027. SharedsTy, CapturedStruct, IfCond,
  3028. Data);
  3029. };
  3030. EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
  3031. }
  3032. void CodeGenFunction::EmitOMPTaskyieldDirective(
  3033. const OMPTaskyieldDirective &S) {
  3034. CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
  3035. }
  3036. void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
  3037. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
  3038. }
  3039. void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
  3040. CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
  3041. }
  3042. void CodeGenFunction::EmitOMPTaskgroupDirective(
  3043. const OMPTaskgroupDirective &S) {
  3044. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3045. Action.Enter(CGF);
  3046. if (const Expr *E = S.getReductionRef()) {
  3047. SmallVector<const Expr *, 4> LHSs;
  3048. SmallVector<const Expr *, 4> RHSs;
  3049. OMPTaskDataTy Data;
  3050. for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
  3051. auto IPriv = C->privates().begin();
  3052. auto IRed = C->reduction_ops().begin();
  3053. auto ILHS = C->lhs_exprs().begin();
  3054. auto IRHS = C->rhs_exprs().begin();
  3055. for (const Expr *Ref : C->varlists()) {
  3056. Data.ReductionVars.emplace_back(Ref);
  3057. Data.ReductionCopies.emplace_back(*IPriv);
  3058. Data.ReductionOps.emplace_back(*IRed);
  3059. LHSs.emplace_back(*ILHS);
  3060. RHSs.emplace_back(*IRHS);
  3061. std::advance(IPriv, 1);
  3062. std::advance(IRed, 1);
  3063. std::advance(ILHS, 1);
  3064. std::advance(IRHS, 1);
  3065. }
  3066. }
  3067. llvm::Value *ReductionDesc =
  3068. CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
  3069. LHSs, RHSs, Data);
  3070. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  3071. CGF.EmitVarDecl(*VD);
  3072. CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
  3073. /*Volatile=*/false, E->getType());
  3074. }
  3075. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  3076. };
  3077. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3078. CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
  3079. }
  3080. void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
  3081. CGM.getOpenMPRuntime().emitFlush(
  3082. *this,
  3083. [&S]() -> ArrayRef<const Expr *> {
  3084. if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
  3085. return llvm::makeArrayRef(FlushClause->varlist_begin(),
  3086. FlushClause->varlist_end());
  3087. return llvm::None;
  3088. }(),
  3089. S.getBeginLoc());
  3090. }
  3091. void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
  3092. const CodeGenLoopTy &CodeGenLoop,
  3093. Expr *IncExpr) {
  3094. // Emit the loop iteration variable.
  3095. const auto *IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  3096. const auto *IVDecl = cast<VarDecl>(IVExpr->getDecl());
  3097. EmitVarDecl(*IVDecl);
  3098. // Emit the iterations count variable.
  3099. // If it is not a variable, Sema decided to calculate iterations count on each
  3100. // iteration (e.g., it is foldable into a constant).
  3101. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  3102. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  3103. // Emit calculation of the iterations count.
  3104. EmitIgnoredExpr(S.getCalcLastIteration());
  3105. }
  3106. CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
  3107. bool HasLastprivateClause = false;
  3108. // Check pre-condition.
  3109. {
  3110. OMPLoopScope PreInitScope(*this, S);
  3111. // Skip the entire loop if we don't meet the precondition.
  3112. // If the condition constant folds and can be elided, avoid emitting the
  3113. // whole loop.
  3114. bool CondConstant;
  3115. llvm::BasicBlock *ContBlock = nullptr;
  3116. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  3117. if (!CondConstant)
  3118. return;
  3119. } else {
  3120. llvm::BasicBlock *ThenBlock = createBasicBlock("omp.precond.then");
  3121. ContBlock = createBasicBlock("omp.precond.end");
  3122. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  3123. getProfileCount(&S));
  3124. EmitBlock(ThenBlock);
  3125. incrementProfileCounter(&S);
  3126. }
  3127. emitAlignedClause(*this, S);
  3128. // Emit 'then' code.
  3129. {
  3130. // Emit helper vars inits.
  3131. LValue LB = EmitOMPHelperVar(
  3132. *this, cast<DeclRefExpr>(
  3133. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3134. ? S.getCombinedLowerBoundVariable()
  3135. : S.getLowerBoundVariable())));
  3136. LValue UB = EmitOMPHelperVar(
  3137. *this, cast<DeclRefExpr>(
  3138. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3139. ? S.getCombinedUpperBoundVariable()
  3140. : S.getUpperBoundVariable())));
  3141. LValue ST =
  3142. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  3143. LValue IL =
  3144. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  3145. OMPPrivateScope LoopScope(*this);
  3146. if (EmitOMPFirstprivateClause(S, LoopScope)) {
  3147. // Emit implicit barrier to synchronize threads and avoid data races
  3148. // on initialization of firstprivate variables and post-update of
  3149. // lastprivate variables.
  3150. CGM.getOpenMPRuntime().emitBarrierCall(
  3151. *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
  3152. /*ForceSimpleCall=*/true);
  3153. }
  3154. EmitOMPPrivateClause(S, LoopScope);
  3155. if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
  3156. !isOpenMPParallelDirective(S.getDirectiveKind()) &&
  3157. !isOpenMPTeamsDirective(S.getDirectiveKind()))
  3158. EmitOMPReductionClauseInit(S, LoopScope);
  3159. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  3160. EmitOMPPrivateLoopCounters(S, LoopScope);
  3161. (void)LoopScope.Privatize();
  3162. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  3163. CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
  3164. // Detect the distribute schedule kind and chunk.
  3165. llvm::Value *Chunk = nullptr;
  3166. OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
  3167. if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
  3168. ScheduleKind = C->getDistScheduleKind();
  3169. if (const Expr *Ch = C->getChunkSize()) {
  3170. Chunk = EmitScalarExpr(Ch);
  3171. Chunk = EmitScalarConversion(Chunk, Ch->getType(),
  3172. S.getIterationVariable()->getType(),
  3173. S.getBeginLoc());
  3174. }
  3175. } else {
  3176. // Default behaviour for dist_schedule clause.
  3177. CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
  3178. *this, S, ScheduleKind, Chunk);
  3179. }
  3180. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  3181. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  3182. // OpenMP [2.10.8, distribute Construct, Description]
  3183. // If dist_schedule is specified, kind must be static. If specified,
  3184. // iterations are divided into chunks of size chunk_size, chunks are
  3185. // assigned to the teams of the league in a round-robin fashion in the
  3186. // order of the team number. When no chunk_size is specified, the
  3187. // iteration space is divided into chunks that are approximately equal
  3188. // in size, and at most one chunk is distributed to each team of the
  3189. // league. The size of the chunks is unspecified in this case.
  3190. bool StaticChunked = RT.isStaticChunked(
  3191. ScheduleKind, /* Chunked */ Chunk != nullptr) &&
  3192. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
  3193. if (RT.isStaticNonchunked(ScheduleKind,
  3194. /* Chunked */ Chunk != nullptr) ||
  3195. StaticChunked) {
  3196. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  3197. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  3198. CGOpenMPRuntime::StaticRTInput StaticInit(
  3199. IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
  3200. LB.getAddress(), UB.getAddress(), ST.getAddress(),
  3201. StaticChunked ? Chunk : nullptr);
  3202. RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
  3203. StaticInit);
  3204. JumpDest LoopExit =
  3205. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  3206. // UB = min(UB, GlobalUB);
  3207. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3208. ? S.getCombinedEnsureUpperBound()
  3209. : S.getEnsureUpperBound());
  3210. // IV = LB;
  3211. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3212. ? S.getCombinedInit()
  3213. : S.getInit());
  3214. const Expr *Cond =
  3215. isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  3216. ? S.getCombinedCond()
  3217. : S.getCond();
  3218. if (StaticChunked)
  3219. Cond = S.getCombinedDistCond();
  3220. // For static unchunked schedules generate:
  3221. //
  3222. // 1. For distribute alone, codegen
  3223. // while (idx <= UB) {
  3224. // BODY;
  3225. // ++idx;
  3226. // }
  3227. //
  3228. // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
  3229. // while (idx <= UB) {
  3230. // <CodeGen rest of pragma>(LB, UB);
  3231. // idx += ST;
  3232. // }
  3233. //
  3234. // For static chunk one schedule generate:
  3235. //
  3236. // while (IV <= GlobalUB) {
  3237. // <CodeGen rest of pragma>(LB, UB);
  3238. // LB += ST;
  3239. // UB += ST;
  3240. // UB = min(UB, GlobalUB);
  3241. // IV = LB;
  3242. // }
  3243. //
  3244. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
  3245. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  3246. CodeGenLoop(CGF, S, LoopExit);
  3247. },
  3248. [&S, StaticChunked](CodeGenFunction &CGF) {
  3249. if (StaticChunked) {
  3250. CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
  3251. CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
  3252. CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
  3253. CGF.EmitIgnoredExpr(S.getCombinedInit());
  3254. }
  3255. });
  3256. EmitBlock(LoopExit.getBlock());
  3257. // Tell the runtime we are done.
  3258. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
  3259. } else {
  3260. // Emit the outer loop, which requests its work chunk [LB..UB] from
  3261. // runtime and runs the inner loop to process it.
  3262. const OMPLoopArguments LoopArguments = {
  3263. LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
  3264. Chunk};
  3265. EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
  3266. CodeGenLoop);
  3267. }
  3268. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  3269. EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
  3270. return CGF.Builder.CreateIsNotNull(
  3271. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  3272. });
  3273. }
  3274. if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
  3275. !isOpenMPParallelDirective(S.getDirectiveKind()) &&
  3276. !isOpenMPTeamsDirective(S.getDirectiveKind())) {
  3277. EmitOMPReductionClauseFinal(S, OMPD_simd);
  3278. // Emit post-update of the reduction variables if IsLastIter != 0.
  3279. emitPostUpdateForReductionClause(
  3280. *this, S, [IL, &S](CodeGenFunction &CGF) {
  3281. return CGF.Builder.CreateIsNotNull(
  3282. CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
  3283. });
  3284. }
  3285. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  3286. if (HasLastprivateClause) {
  3287. EmitOMPLastprivateClauseFinal(
  3288. S, /*NoFinals=*/false,
  3289. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
  3290. }
  3291. }
  3292. // We're now done with the loop, so jump to the continuation block.
  3293. if (ContBlock) {
  3294. EmitBranch(ContBlock);
  3295. EmitBlock(ContBlock, true);
  3296. }
  3297. }
  3298. }
  3299. void CodeGenFunction::EmitOMPDistributeDirective(
  3300. const OMPDistributeDirective &S) {
  3301. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3302. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3303. };
  3304. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3305. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  3306. }
  3307. static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
  3308. const CapturedStmt *S) {
  3309. CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
  3310. CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
  3311. CGF.CapturedStmtInfo = &CapStmtInfo;
  3312. llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
  3313. Fn->setDoesNotRecurse();
  3314. return Fn;
  3315. }
  3316. void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
  3317. if (S.hasClausesOfKind<OMPDependClause>()) {
  3318. assert(!S.getAssociatedStmt() &&
  3319. "No associated statement must be in ordered depend construct.");
  3320. for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
  3321. CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
  3322. return;
  3323. }
  3324. const auto *C = S.getSingleClause<OMPSIMDClause>();
  3325. auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
  3326. PrePostActionTy &Action) {
  3327. const CapturedStmt *CS = S.getInnermostCapturedStmt();
  3328. if (C) {
  3329. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3330. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  3331. llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
  3332. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
  3333. OutlinedFn, CapturedVars);
  3334. } else {
  3335. Action.Enter(CGF);
  3336. CGF.EmitStmt(CS->getCapturedStmt());
  3337. }
  3338. };
  3339. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3340. CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
  3341. }
  3342. static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
  3343. QualType SrcType, QualType DestType,
  3344. SourceLocation Loc) {
  3345. assert(CGF.hasScalarEvaluationKind(DestType) &&
  3346. "DestType must have scalar evaluation kind.");
  3347. assert(!Val.isAggregate() && "Must be a scalar or complex.");
  3348. return Val.isScalar() ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
  3349. DestType, Loc)
  3350. : CGF.EmitComplexToScalarConversion(
  3351. Val.getComplexVal(), SrcType, DestType, Loc);
  3352. }
  3353. static CodeGenFunction::ComplexPairTy
  3354. convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
  3355. QualType DestType, SourceLocation Loc) {
  3356. assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
  3357. "DestType must have complex evaluation kind.");
  3358. CodeGenFunction::ComplexPairTy ComplexVal;
  3359. if (Val.isScalar()) {
  3360. // Convert the input element to the element type of the complex.
  3361. QualType DestElementType =
  3362. DestType->castAs<ComplexType>()->getElementType();
  3363. llvm::Value *ScalarVal = CGF.EmitScalarConversion(
  3364. Val.getScalarVal(), SrcType, DestElementType, Loc);
  3365. ComplexVal = CodeGenFunction::ComplexPairTy(
  3366. ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
  3367. } else {
  3368. assert(Val.isComplex() && "Must be a scalar or complex.");
  3369. QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
  3370. QualType DestElementType =
  3371. DestType->castAs<ComplexType>()->getElementType();
  3372. ComplexVal.first = CGF.EmitScalarConversion(
  3373. Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
  3374. ComplexVal.second = CGF.EmitScalarConversion(
  3375. Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
  3376. }
  3377. return ComplexVal;
  3378. }
  3379. static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
  3380. LValue LVal, RValue RVal) {
  3381. if (LVal.isGlobalReg()) {
  3382. CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
  3383. } else {
  3384. CGF.EmitAtomicStore(RVal, LVal,
  3385. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3386. : llvm::AtomicOrdering::Monotonic,
  3387. LVal.isVolatile(), /*IsInit=*/false);
  3388. }
  3389. }
  3390. void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
  3391. QualType RValTy, SourceLocation Loc) {
  3392. switch (getEvaluationKind(LVal.getType())) {
  3393. case TEK_Scalar:
  3394. EmitStoreThroughLValue(RValue::get(convertToScalarValue(
  3395. *this, RVal, RValTy, LVal.getType(), Loc)),
  3396. LVal);
  3397. break;
  3398. case TEK_Complex:
  3399. EmitStoreOfComplex(
  3400. convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
  3401. /*isInit=*/false);
  3402. break;
  3403. case TEK_Aggregate:
  3404. llvm_unreachable("Must be a scalar or complex.");
  3405. }
  3406. }
  3407. static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3408. const Expr *X, const Expr *V,
  3409. SourceLocation Loc) {
  3410. // v = x;
  3411. assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
  3412. assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
  3413. LValue XLValue = CGF.EmitLValue(X);
  3414. LValue VLValue = CGF.EmitLValue(V);
  3415. RValue Res = XLValue.isGlobalReg()
  3416. ? CGF.EmitLoadOfLValue(XLValue, Loc)
  3417. : CGF.EmitAtomicLoad(
  3418. XLValue, Loc,
  3419. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3420. : llvm::AtomicOrdering::Monotonic,
  3421. XLValue.isVolatile());
  3422. // OpenMP, 2.12.6, atomic Construct
  3423. // Any atomic construct with a seq_cst clause forces the atomically
  3424. // performed operation to include an implicit flush operation without a
  3425. // list.
  3426. if (IsSeqCst)
  3427. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3428. CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
  3429. }
  3430. static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3431. const Expr *X, const Expr *E,
  3432. SourceLocation Loc) {
  3433. // x = expr;
  3434. assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
  3435. emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
  3436. // OpenMP, 2.12.6, atomic Construct
  3437. // Any atomic construct with a seq_cst clause forces the atomically
  3438. // performed operation to include an implicit flush operation without a
  3439. // list.
  3440. if (IsSeqCst)
  3441. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3442. }
  3443. static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
  3444. RValue Update,
  3445. BinaryOperatorKind BO,
  3446. llvm::AtomicOrdering AO,
  3447. bool IsXLHSInRHSPart) {
  3448. ASTContext &Context = CGF.getContext();
  3449. // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
  3450. // expression is simple and atomic is allowed for the given type for the
  3451. // target platform.
  3452. if (BO == BO_Comma || !Update.isScalar() ||
  3453. !Update.getScalarVal()->getType()->isIntegerTy() ||
  3454. !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
  3455. (Update.getScalarVal()->getType() !=
  3456. X.getAddress().getElementType())) ||
  3457. !X.getAddress().getElementType()->isIntegerTy() ||
  3458. !Context.getTargetInfo().hasBuiltinAtomic(
  3459. Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
  3460. return std::make_pair(false, RValue::get(nullptr));
  3461. llvm::AtomicRMWInst::BinOp RMWOp;
  3462. switch (BO) {
  3463. case BO_Add:
  3464. RMWOp = llvm::AtomicRMWInst::Add;
  3465. break;
  3466. case BO_Sub:
  3467. if (!IsXLHSInRHSPart)
  3468. return std::make_pair(false, RValue::get(nullptr));
  3469. RMWOp = llvm::AtomicRMWInst::Sub;
  3470. break;
  3471. case BO_And:
  3472. RMWOp = llvm::AtomicRMWInst::And;
  3473. break;
  3474. case BO_Or:
  3475. RMWOp = llvm::AtomicRMWInst::Or;
  3476. break;
  3477. case BO_Xor:
  3478. RMWOp = llvm::AtomicRMWInst::Xor;
  3479. break;
  3480. case BO_LT:
  3481. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3482. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
  3483. : llvm::AtomicRMWInst::Max)
  3484. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
  3485. : llvm::AtomicRMWInst::UMax);
  3486. break;
  3487. case BO_GT:
  3488. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3489. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
  3490. : llvm::AtomicRMWInst::Min)
  3491. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
  3492. : llvm::AtomicRMWInst::UMin);
  3493. break;
  3494. case BO_Assign:
  3495. RMWOp = llvm::AtomicRMWInst::Xchg;
  3496. break;
  3497. case BO_Mul:
  3498. case BO_Div:
  3499. case BO_Rem:
  3500. case BO_Shl:
  3501. case BO_Shr:
  3502. case BO_LAnd:
  3503. case BO_LOr:
  3504. return std::make_pair(false, RValue::get(nullptr));
  3505. case BO_PtrMemD:
  3506. case BO_PtrMemI:
  3507. case BO_LE:
  3508. case BO_GE:
  3509. case BO_EQ:
  3510. case BO_NE:
  3511. case BO_Cmp:
  3512. case BO_AddAssign:
  3513. case BO_SubAssign:
  3514. case BO_AndAssign:
  3515. case BO_OrAssign:
  3516. case BO_XorAssign:
  3517. case BO_MulAssign:
  3518. case BO_DivAssign:
  3519. case BO_RemAssign:
  3520. case BO_ShlAssign:
  3521. case BO_ShrAssign:
  3522. case BO_Comma:
  3523. llvm_unreachable("Unsupported atomic update operation");
  3524. }
  3525. llvm::Value *UpdateVal = Update.getScalarVal();
  3526. if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
  3527. UpdateVal = CGF.Builder.CreateIntCast(
  3528. IC, X.getAddress().getElementType(),
  3529. X.getType()->hasSignedIntegerRepresentation());
  3530. }
  3531. llvm::Value *Res =
  3532. CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
  3533. return std::make_pair(true, RValue::get(Res));
  3534. }
  3535. std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
  3536. LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
  3537. llvm::AtomicOrdering AO, SourceLocation Loc,
  3538. const llvm::function_ref<RValue(RValue)> CommonGen) {
  3539. // Update expressions are allowed to have the following forms:
  3540. // x binop= expr; -> xrval + expr;
  3541. // x++, ++x -> xrval + 1;
  3542. // x--, --x -> xrval - 1;
  3543. // x = x binop expr; -> xrval binop expr
  3544. // x = expr Op x; - > expr binop xrval;
  3545. auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
  3546. if (!Res.first) {
  3547. if (X.isGlobalReg()) {
  3548. // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
  3549. // 'xrval'.
  3550. EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
  3551. } else {
  3552. // Perform compare-and-swap procedure.
  3553. EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
  3554. }
  3555. }
  3556. return Res;
  3557. }
  3558. static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3559. const Expr *X, const Expr *E,
  3560. const Expr *UE, bool IsXLHSInRHSPart,
  3561. SourceLocation Loc) {
  3562. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3563. "Update expr in 'atomic update' must be a binary operator.");
  3564. const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3565. // Update expressions are allowed to have the following forms:
  3566. // x binop= expr; -> xrval + expr;
  3567. // x++, ++x -> xrval + 1;
  3568. // x--, --x -> xrval - 1;
  3569. // x = x binop expr; -> xrval binop expr
  3570. // x = expr Op x; - > expr binop xrval;
  3571. assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
  3572. LValue XLValue = CGF.EmitLValue(X);
  3573. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3574. llvm::AtomicOrdering AO = IsSeqCst
  3575. ? llvm::AtomicOrdering::SequentiallyConsistent
  3576. : llvm::AtomicOrdering::Monotonic;
  3577. const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3578. const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3579. const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3580. const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3581. auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
  3582. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3583. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3584. return CGF.EmitAnyExpr(UE);
  3585. };
  3586. (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
  3587. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3588. // OpenMP, 2.12.6, atomic Construct
  3589. // Any atomic construct with a seq_cst clause forces the atomically
  3590. // performed operation to include an implicit flush operation without a
  3591. // list.
  3592. if (IsSeqCst)
  3593. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3594. }
  3595. static RValue convertToType(CodeGenFunction &CGF, RValue Value,
  3596. QualType SourceType, QualType ResType,
  3597. SourceLocation Loc) {
  3598. switch (CGF.getEvaluationKind(ResType)) {
  3599. case TEK_Scalar:
  3600. return RValue::get(
  3601. convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
  3602. case TEK_Complex: {
  3603. auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
  3604. return RValue::getComplex(Res.first, Res.second);
  3605. }
  3606. case TEK_Aggregate:
  3607. break;
  3608. }
  3609. llvm_unreachable("Must be a scalar or complex.");
  3610. }
  3611. static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3612. bool IsPostfixUpdate, const Expr *V,
  3613. const Expr *X, const Expr *E,
  3614. const Expr *UE, bool IsXLHSInRHSPart,
  3615. SourceLocation Loc) {
  3616. assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
  3617. assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
  3618. RValue NewVVal;
  3619. LValue VLValue = CGF.EmitLValue(V);
  3620. LValue XLValue = CGF.EmitLValue(X);
  3621. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3622. llvm::AtomicOrdering AO = IsSeqCst
  3623. ? llvm::AtomicOrdering::SequentiallyConsistent
  3624. : llvm::AtomicOrdering::Monotonic;
  3625. QualType NewVValType;
  3626. if (UE) {
  3627. // 'x' is updated with some additional value.
  3628. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3629. "Update expr in 'atomic capture' must be a binary operator.");
  3630. const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3631. // Update expressions are allowed to have the following forms:
  3632. // x binop= expr; -> xrval + expr;
  3633. // x++, ++x -> xrval + 1;
  3634. // x--, --x -> xrval - 1;
  3635. // x = x binop expr; -> xrval binop expr
  3636. // x = expr Op x; - > expr binop xrval;
  3637. const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3638. const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3639. const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3640. NewVValType = XRValExpr->getType();
  3641. const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3642. auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
  3643. IsPostfixUpdate](RValue XRValue) {
  3644. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3645. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3646. RValue Res = CGF.EmitAnyExpr(UE);
  3647. NewVVal = IsPostfixUpdate ? XRValue : Res;
  3648. return Res;
  3649. };
  3650. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3651. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3652. if (Res.first) {
  3653. // 'atomicrmw' instruction was generated.
  3654. if (IsPostfixUpdate) {
  3655. // Use old value from 'atomicrmw'.
  3656. NewVVal = Res.second;
  3657. } else {
  3658. // 'atomicrmw' does not provide new value, so evaluate it using old
  3659. // value of 'x'.
  3660. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3661. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
  3662. NewVVal = CGF.EmitAnyExpr(UE);
  3663. }
  3664. }
  3665. } else {
  3666. // 'x' is simply rewritten with some 'expr'.
  3667. NewVValType = X->getType().getNonReferenceType();
  3668. ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
  3669. X->getType().getNonReferenceType(), Loc);
  3670. auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
  3671. NewVVal = XRValue;
  3672. return ExprRValue;
  3673. };
  3674. // Try to perform atomicrmw xchg, otherwise simple exchange.
  3675. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3676. XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
  3677. Loc, Gen);
  3678. if (Res.first) {
  3679. // 'atomicrmw' instruction was generated.
  3680. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
  3681. }
  3682. }
  3683. // Emit post-update store to 'v' of old/new 'x' value.
  3684. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
  3685. // OpenMP, 2.12.6, atomic Construct
  3686. // Any atomic construct with a seq_cst clause forces the atomically
  3687. // performed operation to include an implicit flush operation without a
  3688. // list.
  3689. if (IsSeqCst)
  3690. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3691. }
  3692. static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
  3693. bool IsSeqCst, bool IsPostfixUpdate,
  3694. const Expr *X, const Expr *V, const Expr *E,
  3695. const Expr *UE, bool IsXLHSInRHSPart,
  3696. SourceLocation Loc) {
  3697. switch (Kind) {
  3698. case OMPC_read:
  3699. emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
  3700. break;
  3701. case OMPC_write:
  3702. emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
  3703. break;
  3704. case OMPC_unknown:
  3705. case OMPC_update:
  3706. emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
  3707. break;
  3708. case OMPC_capture:
  3709. emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
  3710. IsXLHSInRHSPart, Loc);
  3711. break;
  3712. case OMPC_if:
  3713. case OMPC_final:
  3714. case OMPC_num_threads:
  3715. case OMPC_private:
  3716. case OMPC_firstprivate:
  3717. case OMPC_lastprivate:
  3718. case OMPC_reduction:
  3719. case OMPC_task_reduction:
  3720. case OMPC_in_reduction:
  3721. case OMPC_safelen:
  3722. case OMPC_simdlen:
  3723. case OMPC_collapse:
  3724. case OMPC_default:
  3725. case OMPC_seq_cst:
  3726. case OMPC_shared:
  3727. case OMPC_linear:
  3728. case OMPC_aligned:
  3729. case OMPC_copyin:
  3730. case OMPC_copyprivate:
  3731. case OMPC_flush:
  3732. case OMPC_proc_bind:
  3733. case OMPC_schedule:
  3734. case OMPC_ordered:
  3735. case OMPC_nowait:
  3736. case OMPC_untied:
  3737. case OMPC_threadprivate:
  3738. case OMPC_depend:
  3739. case OMPC_mergeable:
  3740. case OMPC_device:
  3741. case OMPC_threads:
  3742. case OMPC_simd:
  3743. case OMPC_map:
  3744. case OMPC_num_teams:
  3745. case OMPC_thread_limit:
  3746. case OMPC_priority:
  3747. case OMPC_grainsize:
  3748. case OMPC_nogroup:
  3749. case OMPC_num_tasks:
  3750. case OMPC_hint:
  3751. case OMPC_dist_schedule:
  3752. case OMPC_defaultmap:
  3753. case OMPC_uniform:
  3754. case OMPC_to:
  3755. case OMPC_from:
  3756. case OMPC_use_device_ptr:
  3757. case OMPC_is_device_ptr:
  3758. case OMPC_unified_address:
  3759. case OMPC_unified_shared_memory:
  3760. case OMPC_reverse_offload:
  3761. case OMPC_dynamic_allocators:
  3762. llvm_unreachable("Clause is not allowed in 'omp atomic'.");
  3763. }
  3764. }
  3765. void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
  3766. bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
  3767. OpenMPClauseKind Kind = OMPC_unknown;
  3768. for (const OMPClause *C : S.clauses()) {
  3769. // Find first clause (skip seq_cst clause, if it is first).
  3770. if (C->getClauseKind() != OMPC_seq_cst) {
  3771. Kind = C->getClauseKind();
  3772. break;
  3773. }
  3774. }
  3775. const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
  3776. if (const auto *FE = dyn_cast<FullExpr>(CS))
  3777. enterFullExpression(FE);
  3778. // Processing for statements under 'atomic capture'.
  3779. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
  3780. for (const Stmt *C : Compound->body()) {
  3781. if (const auto *FE = dyn_cast<FullExpr>(C))
  3782. enterFullExpression(FE);
  3783. }
  3784. }
  3785. auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
  3786. PrePostActionTy &) {
  3787. CGF.EmitStopPoint(CS);
  3788. emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
  3789. S.getV(), S.getExpr(), S.getUpdateExpr(),
  3790. S.isXLHSInRHSPart(), S.getBeginLoc());
  3791. };
  3792. OMPLexicalScope Scope(*this, S, OMPD_unknown);
  3793. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
  3794. }
  3795. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  3796. const OMPExecutableDirective &S,
  3797. const RegionCodeGenTy &CodeGen) {
  3798. assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
  3799. CodeGenModule &CGM = CGF.CGM;
  3800. // On device emit this construct as inlined code.
  3801. if (CGM.getLangOpts().OpenMPIsDevice) {
  3802. OMPLexicalScope Scope(CGF, S, OMPD_target);
  3803. CGM.getOpenMPRuntime().emitInlinedDirective(
  3804. CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3805. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  3806. });
  3807. return;
  3808. }
  3809. llvm::Function *Fn = nullptr;
  3810. llvm::Constant *FnID = nullptr;
  3811. const Expr *IfCond = nullptr;
  3812. // Check for the at most one if clause associated with the target region.
  3813. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3814. if (C->getNameModifier() == OMPD_unknown ||
  3815. C->getNameModifier() == OMPD_target) {
  3816. IfCond = C->getCondition();
  3817. break;
  3818. }
  3819. }
  3820. // Check if we have any device clause associated with the directive.
  3821. const Expr *Device = nullptr;
  3822. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  3823. Device = C->getDevice();
  3824. // Check if we have an if clause whose conditional always evaluates to false
  3825. // or if we do not have any targets specified. If so the target region is not
  3826. // an offload entry point.
  3827. bool IsOffloadEntry = true;
  3828. if (IfCond) {
  3829. bool Val;
  3830. if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
  3831. IsOffloadEntry = false;
  3832. }
  3833. if (CGM.getLangOpts().OMPTargetTriples.empty())
  3834. IsOffloadEntry = false;
  3835. assert(CGF.CurFuncDecl && "No parent declaration for target region!");
  3836. StringRef ParentName;
  3837. // In case we have Ctors/Dtors we use the complete type variant to produce
  3838. // the mangling of the device outlined kernel.
  3839. if (const auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
  3840. ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
  3841. else if (const auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
  3842. ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
  3843. else
  3844. ParentName =
  3845. CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
  3846. // Emit target region as a standalone region.
  3847. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
  3848. IsOffloadEntry, CodeGen);
  3849. OMPLexicalScope Scope(CGF, S, OMPD_task);
  3850. CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
  3851. }
  3852. static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
  3853. PrePostActionTy &Action) {
  3854. Action.Enter(CGF);
  3855. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3856. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3857. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3858. (void)PrivateScope.Privatize();
  3859. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  3860. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  3861. CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
  3862. }
  3863. void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
  3864. StringRef ParentName,
  3865. const OMPTargetDirective &S) {
  3866. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3867. emitTargetRegion(CGF, S, Action);
  3868. };
  3869. llvm::Function *Fn;
  3870. llvm::Constant *Addr;
  3871. // Emit target region as a standalone region.
  3872. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3873. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3874. assert(Fn && Addr && "Target device function emission failed.");
  3875. }
  3876. void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
  3877. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3878. emitTargetRegion(CGF, S, Action);
  3879. };
  3880. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3881. }
  3882. static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
  3883. const OMPExecutableDirective &S,
  3884. OpenMPDirectiveKind InnermostKind,
  3885. const RegionCodeGenTy &CodeGen) {
  3886. const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
  3887. llvm::Value *OutlinedFn =
  3888. CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
  3889. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  3890. const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
  3891. const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
  3892. if (NT || TL) {
  3893. const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
  3894. const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
  3895. CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
  3896. S.getBeginLoc());
  3897. }
  3898. OMPTeamsScope Scope(CGF, S);
  3899. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3900. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  3901. CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
  3902. CapturedVars);
  3903. }
  3904. void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
  3905. // Emit teams region as a standalone region.
  3906. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3907. Action.Enter(CGF);
  3908. OMPPrivateScope PrivateScope(CGF);
  3909. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3910. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3911. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3912. (void)PrivateScope.Privatize();
  3913. CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
  3914. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3915. };
  3916. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  3917. emitPostUpdateForReductionClause(*this, S,
  3918. [](CodeGenFunction &) { return nullptr; });
  3919. }
  3920. static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  3921. const OMPTargetTeamsDirective &S) {
  3922. auto *CS = S.getCapturedStmt(OMPD_teams);
  3923. Action.Enter(CGF);
  3924. // Emit teams region as a standalone region.
  3925. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3926. Action.Enter(CGF);
  3927. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3928. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3929. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3930. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3931. (void)PrivateScope.Privatize();
  3932. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  3933. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  3934. CGF.EmitStmt(CS->getCapturedStmt());
  3935. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3936. };
  3937. emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
  3938. emitPostUpdateForReductionClause(CGF, S,
  3939. [](CodeGenFunction &) { return nullptr; });
  3940. }
  3941. void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
  3942. CodeGenModule &CGM, StringRef ParentName,
  3943. const OMPTargetTeamsDirective &S) {
  3944. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3945. emitTargetTeamsRegion(CGF, Action, S);
  3946. };
  3947. llvm::Function *Fn;
  3948. llvm::Constant *Addr;
  3949. // Emit target region as a standalone region.
  3950. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3951. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3952. assert(Fn && Addr && "Target device function emission failed.");
  3953. }
  3954. void CodeGenFunction::EmitOMPTargetTeamsDirective(
  3955. const OMPTargetTeamsDirective &S) {
  3956. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3957. emitTargetTeamsRegion(CGF, Action, S);
  3958. };
  3959. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3960. }
  3961. static void
  3962. emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  3963. const OMPTargetTeamsDistributeDirective &S) {
  3964. Action.Enter(CGF);
  3965. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3966. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3967. };
  3968. // Emit teams region as a standalone region.
  3969. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3970. PrePostActionTy &Action) {
  3971. Action.Enter(CGF);
  3972. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3973. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3974. (void)PrivateScope.Privatize();
  3975. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  3976. CodeGenDistribute);
  3977. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3978. };
  3979. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
  3980. emitPostUpdateForReductionClause(CGF, S,
  3981. [](CodeGenFunction &) { return nullptr; });
  3982. }
  3983. void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
  3984. CodeGenModule &CGM, StringRef ParentName,
  3985. const OMPTargetTeamsDistributeDirective &S) {
  3986. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3987. emitTargetTeamsDistributeRegion(CGF, Action, S);
  3988. };
  3989. llvm::Function *Fn;
  3990. llvm::Constant *Addr;
  3991. // Emit target region as a standalone region.
  3992. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3993. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3994. assert(Fn && Addr && "Target device function emission failed.");
  3995. }
  3996. void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
  3997. const OMPTargetTeamsDistributeDirective &S) {
  3998. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3999. emitTargetTeamsDistributeRegion(CGF, Action, S);
  4000. };
  4001. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4002. }
  4003. static void emitTargetTeamsDistributeSimdRegion(
  4004. CodeGenFunction &CGF, PrePostActionTy &Action,
  4005. const OMPTargetTeamsDistributeSimdDirective &S) {
  4006. Action.Enter(CGF);
  4007. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4008. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4009. };
  4010. // Emit teams region as a standalone region.
  4011. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4012. PrePostActionTy &Action) {
  4013. Action.Enter(CGF);
  4014. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4015. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4016. (void)PrivateScope.Privatize();
  4017. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4018. CodeGenDistribute);
  4019. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4020. };
  4021. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
  4022. emitPostUpdateForReductionClause(CGF, S,
  4023. [](CodeGenFunction &) { return nullptr; });
  4024. }
  4025. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
  4026. CodeGenModule &CGM, StringRef ParentName,
  4027. const OMPTargetTeamsDistributeSimdDirective &S) {
  4028. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4029. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  4030. };
  4031. llvm::Function *Fn;
  4032. llvm::Constant *Addr;
  4033. // Emit target region as a standalone region.
  4034. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4035. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4036. assert(Fn && Addr && "Target device function emission failed.");
  4037. }
  4038. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
  4039. const OMPTargetTeamsDistributeSimdDirective &S) {
  4040. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4041. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  4042. };
  4043. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4044. }
  4045. void CodeGenFunction::EmitOMPTeamsDistributeDirective(
  4046. const OMPTeamsDistributeDirective &S) {
  4047. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4048. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4049. };
  4050. // Emit teams region as a standalone region.
  4051. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4052. PrePostActionTy &Action) {
  4053. Action.Enter(CGF);
  4054. OMPPrivateScope PrivateScope(CGF);
  4055. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4056. (void)PrivateScope.Privatize();
  4057. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4058. CodeGenDistribute);
  4059. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4060. };
  4061. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  4062. emitPostUpdateForReductionClause(*this, S,
  4063. [](CodeGenFunction &) { return nullptr; });
  4064. }
  4065. void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
  4066. const OMPTeamsDistributeSimdDirective &S) {
  4067. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4068. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  4069. };
  4070. // Emit teams region as a standalone region.
  4071. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4072. PrePostActionTy &Action) {
  4073. Action.Enter(CGF);
  4074. OMPPrivateScope PrivateScope(CGF);
  4075. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4076. (void)PrivateScope.Privatize();
  4077. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
  4078. CodeGenDistribute);
  4079. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4080. };
  4081. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
  4082. emitPostUpdateForReductionClause(*this, S,
  4083. [](CodeGenFunction &) { return nullptr; });
  4084. }
  4085. void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
  4086. const OMPTeamsDistributeParallelForDirective &S) {
  4087. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4088. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4089. S.getDistInc());
  4090. };
  4091. // Emit teams region as a standalone region.
  4092. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4093. PrePostActionTy &Action) {
  4094. Action.Enter(CGF);
  4095. OMPPrivateScope PrivateScope(CGF);
  4096. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4097. (void)PrivateScope.Privatize();
  4098. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  4099. CodeGenDistribute);
  4100. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4101. };
  4102. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  4103. emitPostUpdateForReductionClause(*this, S,
  4104. [](CodeGenFunction &) { return nullptr; });
  4105. }
  4106. void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
  4107. const OMPTeamsDistributeParallelForSimdDirective &S) {
  4108. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4109. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4110. S.getDistInc());
  4111. };
  4112. // Emit teams region as a standalone region.
  4113. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4114. PrePostActionTy &Action) {
  4115. Action.Enter(CGF);
  4116. OMPPrivateScope PrivateScope(CGF);
  4117. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4118. (void)PrivateScope.Privatize();
  4119. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4120. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4121. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4122. };
  4123. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  4124. emitPostUpdateForReductionClause(*this, S,
  4125. [](CodeGenFunction &) { return nullptr; });
  4126. }
  4127. static void emitTargetTeamsDistributeParallelForRegion(
  4128. CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
  4129. PrePostActionTy &Action) {
  4130. Action.Enter(CGF);
  4131. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4132. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4133. S.getDistInc());
  4134. };
  4135. // Emit teams region as a standalone region.
  4136. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4137. PrePostActionTy &Action) {
  4138. Action.Enter(CGF);
  4139. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4140. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4141. (void)PrivateScope.Privatize();
  4142. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4143. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4144. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4145. };
  4146. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
  4147. CodeGenTeams);
  4148. emitPostUpdateForReductionClause(CGF, S,
  4149. [](CodeGenFunction &) { return nullptr; });
  4150. }
  4151. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
  4152. CodeGenModule &CGM, StringRef ParentName,
  4153. const OMPTargetTeamsDistributeParallelForDirective &S) {
  4154. // Emit SPMD target teams distribute parallel for region as a standalone
  4155. // region.
  4156. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4157. emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
  4158. };
  4159. llvm::Function *Fn;
  4160. llvm::Constant *Addr;
  4161. // Emit target region as a standalone region.
  4162. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4163. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4164. assert(Fn && Addr && "Target device function emission failed.");
  4165. }
  4166. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
  4167. const OMPTargetTeamsDistributeParallelForDirective &S) {
  4168. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4169. emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
  4170. };
  4171. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4172. }
  4173. static void emitTargetTeamsDistributeParallelForSimdRegion(
  4174. CodeGenFunction &CGF,
  4175. const OMPTargetTeamsDistributeParallelForSimdDirective &S,
  4176. PrePostActionTy &Action) {
  4177. Action.Enter(CGF);
  4178. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4179. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  4180. S.getDistInc());
  4181. };
  4182. // Emit teams region as a standalone region.
  4183. auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  4184. PrePostActionTy &Action) {
  4185. Action.Enter(CGF);
  4186. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4187. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4188. (void)PrivateScope.Privatize();
  4189. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  4190. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  4191. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  4192. };
  4193. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
  4194. CodeGenTeams);
  4195. emitPostUpdateForReductionClause(CGF, S,
  4196. [](CodeGenFunction &) { return nullptr; });
  4197. }
  4198. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
  4199. CodeGenModule &CGM, StringRef ParentName,
  4200. const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
  4201. // Emit SPMD target teams distribute parallel for simd region as a standalone
  4202. // region.
  4203. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4204. emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
  4205. };
  4206. llvm::Function *Fn;
  4207. llvm::Constant *Addr;
  4208. // Emit target region as a standalone region.
  4209. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4210. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4211. assert(Fn && Addr && "Target device function emission failed.");
  4212. }
  4213. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
  4214. const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
  4215. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4216. emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
  4217. };
  4218. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4219. }
  4220. void CodeGenFunction::EmitOMPCancellationPointDirective(
  4221. const OMPCancellationPointDirective &S) {
  4222. CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
  4223. S.getCancelRegion());
  4224. }
  4225. void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
  4226. const Expr *IfCond = nullptr;
  4227. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  4228. if (C->getNameModifier() == OMPD_unknown ||
  4229. C->getNameModifier() == OMPD_cancel) {
  4230. IfCond = C->getCondition();
  4231. break;
  4232. }
  4233. }
  4234. CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
  4235. S.getCancelRegion());
  4236. }
  4237. CodeGenFunction::JumpDest
  4238. CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
  4239. if (Kind == OMPD_parallel || Kind == OMPD_task ||
  4240. Kind == OMPD_target_parallel)
  4241. return ReturnBlock;
  4242. assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
  4243. Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
  4244. Kind == OMPD_distribute_parallel_for ||
  4245. Kind == OMPD_target_parallel_for ||
  4246. Kind == OMPD_teams_distribute_parallel_for ||
  4247. Kind == OMPD_target_teams_distribute_parallel_for);
  4248. return OMPCancelStack.getExitBlock();
  4249. }
  4250. void CodeGenFunction::EmitOMPUseDevicePtrClause(
  4251. const OMPClause &NC, OMPPrivateScope &PrivateScope,
  4252. const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
  4253. const auto &C = cast<OMPUseDevicePtrClause>(NC);
  4254. auto OrigVarIt = C.varlist_begin();
  4255. auto InitIt = C.inits().begin();
  4256. for (const Expr *PvtVarIt : C.private_copies()) {
  4257. const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
  4258. const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
  4259. const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
  4260. // In order to identify the right initializer we need to match the
  4261. // declaration used by the mapping logic. In some cases we may get
  4262. // OMPCapturedExprDecl that refers to the original declaration.
  4263. const ValueDecl *MatchingVD = OrigVD;
  4264. if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
  4265. // OMPCapturedExprDecl are used to privative fields of the current
  4266. // structure.
  4267. const auto *ME = cast<MemberExpr>(OED->getInit());
  4268. assert(isa<CXXThisExpr>(ME->getBase()) &&
  4269. "Base should be the current struct!");
  4270. MatchingVD = ME->getMemberDecl();
  4271. }
  4272. // If we don't have information about the current list item, move on to
  4273. // the next one.
  4274. auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
  4275. if (InitAddrIt == CaptureDeviceAddrMap.end())
  4276. continue;
  4277. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
  4278. InitAddrIt, InitVD,
  4279. PvtVD]() {
  4280. // Initialize the temporary initialization variable with the address we
  4281. // get from the runtime library. We have to cast the source address
  4282. // because it is always a void *. References are materialized in the
  4283. // privatization scope, so the initialization here disregards the fact
  4284. // the original variable is a reference.
  4285. QualType AddrQTy =
  4286. getContext().getPointerType(OrigVD->getType().getNonReferenceType());
  4287. llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
  4288. Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
  4289. setAddrOfLocalVar(InitVD, InitAddr);
  4290. // Emit private declaration, it will be initialized by the value we
  4291. // declaration we just added to the local declarations map.
  4292. EmitDecl(*PvtVD);
  4293. // The initialization variables reached its purpose in the emission
  4294. // of the previous declaration, so we don't need it anymore.
  4295. LocalDeclMap.erase(InitVD);
  4296. // Return the address of the private variable.
  4297. return GetAddrOfLocalVar(PvtVD);
  4298. });
  4299. assert(IsRegistered && "firstprivate var already registered as private");
  4300. // Silence the warning about unused variable.
  4301. (void)IsRegistered;
  4302. ++OrigVarIt;
  4303. ++InitIt;
  4304. }
  4305. }
  4306. // Generate the instructions for '#pragma omp target data' directive.
  4307. void CodeGenFunction::EmitOMPTargetDataDirective(
  4308. const OMPTargetDataDirective &S) {
  4309. CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
  4310. // Create a pre/post action to signal the privatization of the device pointer.
  4311. // This action can be replaced by the OpenMP runtime code generation to
  4312. // deactivate privatization.
  4313. bool PrivatizeDevicePointers = false;
  4314. class DevicePointerPrivActionTy : public PrePostActionTy {
  4315. bool &PrivatizeDevicePointers;
  4316. public:
  4317. explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
  4318. : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
  4319. void Enter(CodeGenFunction &CGF) override {
  4320. PrivatizeDevicePointers = true;
  4321. }
  4322. };
  4323. DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
  4324. auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
  4325. CodeGenFunction &CGF, PrePostActionTy &Action) {
  4326. auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4327. CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
  4328. };
  4329. // Codegen that selects whether to generate the privatization code or not.
  4330. auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
  4331. &InnermostCodeGen](CodeGenFunction &CGF,
  4332. PrePostActionTy &Action) {
  4333. RegionCodeGenTy RCG(InnermostCodeGen);
  4334. PrivatizeDevicePointers = false;
  4335. // Call the pre-action to change the status of PrivatizeDevicePointers if
  4336. // needed.
  4337. Action.Enter(CGF);
  4338. if (PrivatizeDevicePointers) {
  4339. OMPPrivateScope PrivateScope(CGF);
  4340. // Emit all instances of the use_device_ptr clause.
  4341. for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
  4342. CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
  4343. Info.CaptureDeviceAddrMap);
  4344. (void)PrivateScope.Privatize();
  4345. RCG(CGF);
  4346. } else {
  4347. RCG(CGF);
  4348. }
  4349. };
  4350. // Forward the provided action to the privatization codegen.
  4351. RegionCodeGenTy PrivRCG(PrivCodeGen);
  4352. PrivRCG.setAction(Action);
  4353. // Notwithstanding the body of the region is emitted as inlined directive,
  4354. // we don't use an inline scope as changes in the references inside the
  4355. // region are expected to be visible outside, so we do not privative them.
  4356. OMPLexicalScope Scope(CGF, S);
  4357. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
  4358. PrivRCG);
  4359. };
  4360. RegionCodeGenTy RCG(CodeGen);
  4361. // If we don't have target devices, don't bother emitting the data mapping
  4362. // code.
  4363. if (CGM.getLangOpts().OMPTargetTriples.empty()) {
  4364. RCG(*this);
  4365. return;
  4366. }
  4367. // Check if we have any if clause associated with the directive.
  4368. const Expr *IfCond = nullptr;
  4369. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4370. IfCond = C->getCondition();
  4371. // Check if we have any device clause associated with the directive.
  4372. const Expr *Device = nullptr;
  4373. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4374. Device = C->getDevice();
  4375. // Set the action to signal privatization of device pointers.
  4376. RCG.setAction(PrivAction);
  4377. // Emit region code.
  4378. CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
  4379. Info);
  4380. }
  4381. void CodeGenFunction::EmitOMPTargetEnterDataDirective(
  4382. const OMPTargetEnterDataDirective &S) {
  4383. // If we don't have target devices, don't bother emitting the data mapping
  4384. // code.
  4385. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4386. return;
  4387. // Check if we have any if clause associated with the directive.
  4388. const Expr *IfCond = nullptr;
  4389. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4390. IfCond = C->getCondition();
  4391. // Check if we have any device clause associated with the directive.
  4392. const Expr *Device = nullptr;
  4393. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4394. Device = C->getDevice();
  4395. OMPLexicalScope Scope(*this, S, OMPD_task);
  4396. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4397. }
  4398. void CodeGenFunction::EmitOMPTargetExitDataDirective(
  4399. const OMPTargetExitDataDirective &S) {
  4400. // If we don't have target devices, don't bother emitting the data mapping
  4401. // code.
  4402. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4403. return;
  4404. // Check if we have any if clause associated with the directive.
  4405. const Expr *IfCond = nullptr;
  4406. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4407. IfCond = C->getCondition();
  4408. // Check if we have any device clause associated with the directive.
  4409. const Expr *Device = nullptr;
  4410. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4411. Device = C->getDevice();
  4412. OMPLexicalScope Scope(*this, S, OMPD_task);
  4413. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4414. }
  4415. static void emitTargetParallelRegion(CodeGenFunction &CGF,
  4416. const OMPTargetParallelDirective &S,
  4417. PrePostActionTy &Action) {
  4418. // Get the captured statement associated with the 'parallel' region.
  4419. const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
  4420. Action.Enter(CGF);
  4421. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4422. Action.Enter(CGF);
  4423. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4424. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  4425. CGF.EmitOMPPrivateClause(S, PrivateScope);
  4426. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4427. (void)PrivateScope.Privatize();
  4428. if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
  4429. CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
  4430. // TODO: Add support for clauses.
  4431. CGF.EmitStmt(CS->getCapturedStmt());
  4432. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  4433. };
  4434. emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
  4435. emitEmptyBoundParameters);
  4436. emitPostUpdateForReductionClause(CGF, S,
  4437. [](CodeGenFunction &) { return nullptr; });
  4438. }
  4439. void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
  4440. CodeGenModule &CGM, StringRef ParentName,
  4441. const OMPTargetParallelDirective &S) {
  4442. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4443. emitTargetParallelRegion(CGF, S, Action);
  4444. };
  4445. llvm::Function *Fn;
  4446. llvm::Constant *Addr;
  4447. // Emit target region as a standalone region.
  4448. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4449. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4450. assert(Fn && Addr && "Target device function emission failed.");
  4451. }
  4452. void CodeGenFunction::EmitOMPTargetParallelDirective(
  4453. const OMPTargetParallelDirective &S) {
  4454. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4455. emitTargetParallelRegion(CGF, S, Action);
  4456. };
  4457. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4458. }
  4459. static void emitTargetParallelForRegion(CodeGenFunction &CGF,
  4460. const OMPTargetParallelForDirective &S,
  4461. PrePostActionTy &Action) {
  4462. Action.Enter(CGF);
  4463. // Emit directive as a combined directive that consists of two implicit
  4464. // directives: 'parallel' with 'for' directive.
  4465. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4466. Action.Enter(CGF);
  4467. CodeGenFunction::OMPCancelStackRAII CancelRegion(
  4468. CGF, OMPD_target_parallel_for, S.hasCancel());
  4469. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4470. emitDispatchForLoopBounds);
  4471. };
  4472. emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
  4473. emitEmptyBoundParameters);
  4474. }
  4475. void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
  4476. CodeGenModule &CGM, StringRef ParentName,
  4477. const OMPTargetParallelForDirective &S) {
  4478. // Emit SPMD target parallel for region as a standalone region.
  4479. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4480. emitTargetParallelForRegion(CGF, S, Action);
  4481. };
  4482. llvm::Function *Fn;
  4483. llvm::Constant *Addr;
  4484. // Emit target region as a standalone region.
  4485. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4486. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4487. assert(Fn && Addr && "Target device function emission failed.");
  4488. }
  4489. void CodeGenFunction::EmitOMPTargetParallelForDirective(
  4490. const OMPTargetParallelForDirective &S) {
  4491. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4492. emitTargetParallelForRegion(CGF, S, Action);
  4493. };
  4494. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4495. }
  4496. static void
  4497. emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
  4498. const OMPTargetParallelForSimdDirective &S,
  4499. PrePostActionTy &Action) {
  4500. Action.Enter(CGF);
  4501. // Emit directive as a combined directive that consists of two implicit
  4502. // directives: 'parallel' with 'for' directive.
  4503. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4504. Action.Enter(CGF);
  4505. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4506. emitDispatchForLoopBounds);
  4507. };
  4508. emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
  4509. emitEmptyBoundParameters);
  4510. }
  4511. void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
  4512. CodeGenModule &CGM, StringRef ParentName,
  4513. const OMPTargetParallelForSimdDirective &S) {
  4514. // Emit SPMD target parallel for region as a standalone region.
  4515. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4516. emitTargetParallelForSimdRegion(CGF, S, Action);
  4517. };
  4518. llvm::Function *Fn;
  4519. llvm::Constant *Addr;
  4520. // Emit target region as a standalone region.
  4521. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4522. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4523. assert(Fn && Addr && "Target device function emission failed.");
  4524. }
  4525. void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
  4526. const OMPTargetParallelForSimdDirective &S) {
  4527. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4528. emitTargetParallelForSimdRegion(CGF, S, Action);
  4529. };
  4530. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4531. }
  4532. /// Emit a helper variable and return corresponding lvalue.
  4533. static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
  4534. const ImplicitParamDecl *PVD,
  4535. CodeGenFunction::OMPPrivateScope &Privates) {
  4536. const auto *VDecl = cast<VarDecl>(Helper->getDecl());
  4537. Privates.addPrivate(VDecl,
  4538. [&CGF, PVD]() { return CGF.GetAddrOfLocalVar(PVD); });
  4539. }
  4540. void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
  4541. assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
  4542. // Emit outlined function for task construct.
  4543. const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
  4544. Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
  4545. QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  4546. const Expr *IfCond = nullptr;
  4547. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  4548. if (C->getNameModifier() == OMPD_unknown ||
  4549. C->getNameModifier() == OMPD_taskloop) {
  4550. IfCond = C->getCondition();
  4551. break;
  4552. }
  4553. }
  4554. OMPTaskDataTy Data;
  4555. // Check if taskloop must be emitted without taskgroup.
  4556. Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
  4557. // TODO: Check if we should emit tied or untied task.
  4558. Data.Tied = true;
  4559. // Set scheduling for taskloop
  4560. if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
  4561. // grainsize clause
  4562. Data.Schedule.setInt(/*IntVal=*/false);
  4563. Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
  4564. } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
  4565. // num_tasks clause
  4566. Data.Schedule.setInt(/*IntVal=*/true);
  4567. Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
  4568. }
  4569. auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
  4570. // if (PreCond) {
  4571. // for (IV in 0..LastIteration) BODY;
  4572. // <Final counter/linear vars updates>;
  4573. // }
  4574. //
  4575. // Emit: if (PreCond) - begin.
  4576. // If the condition constant folds and can be elided, avoid emitting the
  4577. // whole loop.
  4578. bool CondConstant;
  4579. llvm::BasicBlock *ContBlock = nullptr;
  4580. OMPLoopScope PreInitScope(CGF, S);
  4581. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  4582. if (!CondConstant)
  4583. return;
  4584. } else {
  4585. llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
  4586. ContBlock = CGF.createBasicBlock("taskloop.if.end");
  4587. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  4588. CGF.getProfileCount(&S));
  4589. CGF.EmitBlock(ThenBlock);
  4590. CGF.incrementProfileCounter(&S);
  4591. }
  4592. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  4593. CGF.EmitOMPSimdInit(S);
  4594. OMPPrivateScope LoopScope(CGF);
  4595. // Emit helper vars inits.
  4596. enum { LowerBound = 5, UpperBound, Stride, LastIter };
  4597. auto *I = CS->getCapturedDecl()->param_begin();
  4598. auto *LBP = std::next(I, LowerBound);
  4599. auto *UBP = std::next(I, UpperBound);
  4600. auto *STP = std::next(I, Stride);
  4601. auto *LIP = std::next(I, LastIter);
  4602. mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
  4603. LoopScope);
  4604. mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
  4605. LoopScope);
  4606. mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
  4607. mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
  4608. LoopScope);
  4609. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  4610. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  4611. (void)LoopScope.Privatize();
  4612. // Emit the loop iteration variable.
  4613. const Expr *IVExpr = S.getIterationVariable();
  4614. const auto *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  4615. CGF.EmitVarDecl(*IVDecl);
  4616. CGF.EmitIgnoredExpr(S.getInit());
  4617. // Emit the iterations count variable.
  4618. // If it is not a variable, Sema decided to calculate iterations count on
  4619. // each iteration (e.g., it is foldable into a constant).
  4620. if (const auto *LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  4621. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  4622. // Emit calculation of the iterations count.
  4623. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  4624. }
  4625. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  4626. S.getInc(),
  4627. [&S](CodeGenFunction &CGF) {
  4628. CGF.EmitOMPLoopBody(S, JumpDest());
  4629. CGF.EmitStopPoint(&S);
  4630. },
  4631. [](CodeGenFunction &) {});
  4632. // Emit: if (PreCond) - end.
  4633. if (ContBlock) {
  4634. CGF.EmitBranch(ContBlock);
  4635. CGF.EmitBlock(ContBlock, true);
  4636. }
  4637. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  4638. if (HasLastprivateClause) {
  4639. CGF.EmitOMPLastprivateClauseFinal(
  4640. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  4641. CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
  4642. CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
  4643. (*LIP)->getType(), S.getBeginLoc())));
  4644. }
  4645. };
  4646. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  4647. IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
  4648. const OMPTaskDataTy &Data) {
  4649. auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
  4650. &Data](CodeGenFunction &CGF, PrePostActionTy &) {
  4651. OMPLoopScope PreInitScope(CGF, S);
  4652. CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
  4653. OutlinedFn, SharedsTy,
  4654. CapturedStruct, IfCond, Data);
  4655. };
  4656. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
  4657. CodeGen);
  4658. };
  4659. if (Data.Nogroup) {
  4660. EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
  4661. } else {
  4662. CGM.getOpenMPRuntime().emitTaskgroupRegion(
  4663. *this,
  4664. [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
  4665. PrePostActionTy &Action) {
  4666. Action.Enter(CGF);
  4667. CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
  4668. Data);
  4669. },
  4670. S.getBeginLoc());
  4671. }
  4672. }
  4673. void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
  4674. EmitOMPTaskLoopBasedDirective(S);
  4675. }
  4676. void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
  4677. const OMPTaskLoopSimdDirective &S) {
  4678. EmitOMPTaskLoopBasedDirective(S);
  4679. }
  4680. // Generate the instructions for '#pragma omp target update' directive.
  4681. void CodeGenFunction::EmitOMPTargetUpdateDirective(
  4682. const OMPTargetUpdateDirective &S) {
  4683. // If we don't have target devices, don't bother emitting the data mapping
  4684. // code.
  4685. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4686. return;
  4687. // Check if we have any if clause associated with the directive.
  4688. const Expr *IfCond = nullptr;
  4689. if (const auto *C = S.getSingleClause<OMPIfClause>())
  4690. IfCond = C->getCondition();
  4691. // Check if we have any device clause associated with the directive.
  4692. const Expr *Device = nullptr;
  4693. if (const auto *C = S.getSingleClause<OMPDeviceClause>())
  4694. Device = C->getDevice();
  4695. OMPLexicalScope Scope(*this, S, OMPD_task);
  4696. CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
  4697. }
  4698. void CodeGenFunction::EmitSimpleOMPExecutableDirective(
  4699. const OMPExecutableDirective &D) {
  4700. if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
  4701. return;
  4702. auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4703. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  4704. emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
  4705. } else {
  4706. OMPPrivateScope LoopGlobals(CGF);
  4707. if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
  4708. for (const Expr *E : LD->counters()) {
  4709. const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  4710. if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
  4711. LValue GlobLVal = CGF.EmitLValue(E);
  4712. LoopGlobals.addPrivate(
  4713. VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
  4714. }
  4715. if (isa<OMPCapturedExprDecl>(VD)) {
  4716. // Emit only those that were not explicitly referenced in clauses.
  4717. if (!CGF.LocalDeclMap.count(VD))
  4718. CGF.EmitVarDecl(*VD);
  4719. }
  4720. }
  4721. for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
  4722. if (!C->getNumForLoops())
  4723. continue;
  4724. for (unsigned I = LD->getCollapsedNumber(),
  4725. E = C->getLoopNumIterations().size();
  4726. I < E; ++I) {
  4727. if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
  4728. cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
  4729. // Emit only those that were not explicitly referenced in clauses.
  4730. if (!CGF.LocalDeclMap.count(VD))
  4731. CGF.EmitVarDecl(*VD);
  4732. }
  4733. }
  4734. }
  4735. }
  4736. LoopGlobals.Privatize();
  4737. CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
  4738. }
  4739. };
  4740. OMPSimdLexicalScope Scope(*this, D);
  4741. CGM.getOpenMPRuntime().emitInlinedDirective(
  4742. *this,
  4743. isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
  4744. : D.getDirectiveKind(),
  4745. CodeGen);
  4746. }