CGStmtOpenMP.cpp 191 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595
  1. //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This contains code to emit OpenMP nodes as LLVM code.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "CGCleanup.h"
  14. #include "CGOpenMPRuntime.h"
  15. #include "CodeGenFunction.h"
  16. #include "CodeGenModule.h"
  17. #include "TargetInfo.h"
  18. #include "clang/AST/Stmt.h"
  19. #include "clang/AST/StmtOpenMP.h"
  20. #include "clang/AST/DeclOpenMP.h"
  21. #include "llvm/IR/CallSite.h"
  22. using namespace clang;
  23. using namespace CodeGen;
  24. namespace {
  25. /// Lexical scope for OpenMP executable constructs, that handles correct codegen
  26. /// for captured expressions.
  27. class OMPLexicalScope : public CodeGenFunction::LexicalScope {
  28. void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  29. for (const auto *C : S.clauses()) {
  30. if (auto *CPI = OMPClauseWithPreInit::get(C)) {
  31. if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
  32. for (const auto *I : PreInit->decls()) {
  33. if (!I->hasAttr<OMPCaptureNoInitAttr>())
  34. CGF.EmitVarDecl(cast<VarDecl>(*I));
  35. else {
  36. CodeGenFunction::AutoVarEmission Emission =
  37. CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
  38. CGF.EmitAutoVarCleanups(Emission);
  39. }
  40. }
  41. }
  42. }
  43. }
  44. }
  45. CodeGenFunction::OMPPrivateScope InlinedShareds;
  46. static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
  47. return CGF.LambdaCaptureFields.lookup(VD) ||
  48. (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
  49. (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
  50. }
  51. public:
  52. OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
  53. bool AsInlined = false, bool EmitPreInitStmt = true)
  54. : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
  55. InlinedShareds(CGF) {
  56. if (EmitPreInitStmt)
  57. emitPreInitStmt(CGF, S);
  58. if (AsInlined) {
  59. if (S.hasAssociatedStmt()) {
  60. auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
  61. for (auto &C : CS->captures()) {
  62. if (C.capturesVariable() || C.capturesVariableByCopy()) {
  63. auto *VD = C.getCapturedVar();
  64. assert(VD == VD->getCanonicalDecl() &&
  65. "Canonical decl must be captured.");
  66. DeclRefExpr DRE(const_cast<VarDecl *>(VD),
  67. isCapturedVar(CGF, VD) ||
  68. (CGF.CapturedStmtInfo &&
  69. InlinedShareds.isGlobalVarCaptured(VD)),
  70. VD->getType().getNonReferenceType(), VK_LValue,
  71. SourceLocation());
  72. InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
  73. return CGF.EmitLValue(&DRE).getAddress();
  74. });
  75. }
  76. }
  77. (void)InlinedShareds.Privatize();
  78. }
  79. }
  80. }
  81. };
  82. /// Lexical scope for OpenMP parallel construct, that handles correct codegen
  83. /// for captured expressions.
  84. class OMPParallelScope final : public OMPLexicalScope {
  85. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  86. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  87. return !(isOpenMPTargetExecutionDirective(Kind) ||
  88. isOpenMPLoopBoundSharingDirective(Kind)) &&
  89. isOpenMPParallelDirective(Kind);
  90. }
  91. public:
  92. OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  93. : OMPLexicalScope(CGF, S,
  94. /*AsInlined=*/false,
  95. /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
  96. };
  97. /// Lexical scope for OpenMP teams construct, that handles correct codegen
  98. /// for captured expressions.
  99. class OMPTeamsScope final : public OMPLexicalScope {
  100. bool EmitPreInitStmt(const OMPExecutableDirective &S) {
  101. OpenMPDirectiveKind Kind = S.getDirectiveKind();
  102. return !isOpenMPTargetExecutionDirective(Kind) &&
  103. isOpenMPTeamsDirective(Kind);
  104. }
  105. public:
  106. OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
  107. : OMPLexicalScope(CGF, S,
  108. /*AsInlined=*/false,
  109. /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
  110. };
  111. /// Private scope for OpenMP loop-based directives, that supports capturing
  112. /// of used expression from loop statement.
  113. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
  114. void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
  115. CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
  116. for (auto *E : S.counters()) {
  117. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  118. (void)PreCondScope.addPrivate(VD, [&CGF, VD]() {
  119. return CGF.CreateMemTemp(VD->getType().getNonReferenceType());
  120. });
  121. }
  122. (void)PreCondScope.Privatize();
  123. if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
  124. if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
  125. for (const auto *I : PreInits->decls())
  126. CGF.EmitVarDecl(cast<VarDecl>(*I));
  127. }
  128. }
  129. }
  130. public:
  131. OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
  132. : CodeGenFunction::RunCleanupsScope(CGF) {
  133. emitPreInitStmt(CGF, S);
  134. }
  135. };
  136. } // namespace
  137. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  138. const OMPExecutableDirective &S,
  139. const RegionCodeGenTy &CodeGen);
  140. LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
  141. if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) {
  142. if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) {
  143. OrigVD = OrigVD->getCanonicalDecl();
  144. bool IsCaptured =
  145. LambdaCaptureFields.lookup(OrigVD) ||
  146. (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
  147. (CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
  148. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
  149. OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
  150. return EmitLValue(&DRE);
  151. }
  152. }
  153. return EmitLValue(E);
  154. }
  155. llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
  156. auto &C = getContext();
  157. llvm::Value *Size = nullptr;
  158. auto SizeInChars = C.getTypeSizeInChars(Ty);
  159. if (SizeInChars.isZero()) {
  160. // getTypeSizeInChars() returns 0 for a VLA.
  161. while (auto *VAT = C.getAsVariableArrayType(Ty)) {
  162. llvm::Value *ArraySize;
  163. std::tie(ArraySize, Ty) = getVLASize(VAT);
  164. Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
  165. }
  166. SizeInChars = C.getTypeSizeInChars(Ty);
  167. if (SizeInChars.isZero())
  168. return llvm::ConstantInt::get(SizeTy, /*V=*/0);
  169. Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
  170. } else
  171. Size = CGM.getSize(SizeInChars);
  172. return Size;
  173. }
  174. void CodeGenFunction::GenerateOpenMPCapturedVars(
  175. const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
  176. const RecordDecl *RD = S.getCapturedRecordDecl();
  177. auto CurField = RD->field_begin();
  178. auto CurCap = S.captures().begin();
  179. for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
  180. E = S.capture_init_end();
  181. I != E; ++I, ++CurField, ++CurCap) {
  182. if (CurField->hasCapturedVLAType()) {
  183. auto VAT = CurField->getCapturedVLAType();
  184. auto *Val = VLASizeMap[VAT->getSizeExpr()];
  185. CapturedVars.push_back(Val);
  186. } else if (CurCap->capturesThis())
  187. CapturedVars.push_back(CXXThisValue);
  188. else if (CurCap->capturesVariableByCopy()) {
  189. llvm::Value *CV =
  190. EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
  191. // If the field is not a pointer, we need to save the actual value
  192. // and load it as a void pointer.
  193. if (!CurField->getType()->isAnyPointerType()) {
  194. auto &Ctx = getContext();
  195. auto DstAddr = CreateMemTemp(
  196. Ctx.getUIntPtrType(),
  197. Twine(CurCap->getCapturedVar()->getName()) + ".casted");
  198. LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
  199. auto *SrcAddrVal = EmitScalarConversion(
  200. DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
  201. Ctx.getPointerType(CurField->getType()), SourceLocation());
  202. LValue SrcLV =
  203. MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
  204. // Store the value using the source type pointer.
  205. EmitStoreThroughLValue(RValue::get(CV), SrcLV);
  206. // Load the value using the destination type pointer.
  207. CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
  208. }
  209. CapturedVars.push_back(CV);
  210. } else {
  211. assert(CurCap->capturesVariable() && "Expected capture by reference.");
  212. CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
  213. }
  214. }
  215. }
  216. static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
  217. StringRef Name, LValue AddrLV,
  218. bool isReferenceType = false) {
  219. ASTContext &Ctx = CGF.getContext();
  220. auto *CastedPtr = CGF.EmitScalarConversion(
  221. AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
  222. Ctx.getPointerType(DstType), SourceLocation());
  223. auto TmpAddr =
  224. CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
  225. .getAddress();
  226. // If we are dealing with references we need to return the address of the
  227. // reference instead of the reference of the value.
  228. if (isReferenceType) {
  229. QualType RefType = Ctx.getLValueReferenceType(DstType);
  230. auto *RefVal = TmpAddr.getPointer();
  231. TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
  232. auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
  233. CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
  234. }
  235. return TmpAddr;
  236. }
  237. static QualType getCanonicalParamType(ASTContext &C, QualType T) {
  238. if (T->isLValueReferenceType()) {
  239. return C.getLValueReferenceType(
  240. getCanonicalParamType(C, T.getNonReferenceType()),
  241. /*SpelledAsLValue=*/false);
  242. }
  243. if (T->isPointerType())
  244. return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
  245. if (auto *A = T->getAsArrayTypeUnsafe()) {
  246. if (auto *VLA = dyn_cast<VariableArrayType>(A))
  247. return getCanonicalParamType(C, VLA->getElementType());
  248. else if (!A->isVariablyModifiedType())
  249. return C.getCanonicalType(T);
  250. }
  251. return C.getCanonicalParamType(T);
  252. }
  253. namespace {
  254. /// Contains required data for proper outlined function codegen.
  255. struct FunctionOptions {
  256. /// Captured statement for which the function is generated.
  257. const CapturedStmt *S = nullptr;
  258. /// true if cast to/from UIntPtr is required for variables captured by
  259. /// value.
  260. const bool UIntPtrCastRequired = true;
  261. /// true if only casted arguments must be registered as local args or VLA
  262. /// sizes.
  263. const bool RegisterCastedArgsOnly = false;
  264. /// Name of the generated function.
  265. const StringRef FunctionName;
  266. explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
  267. bool RegisterCastedArgsOnly,
  268. StringRef FunctionName)
  269. : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
  270. RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
  271. FunctionName(FunctionName) {}
  272. };
  273. }
  274. static llvm::Function *emitOutlinedFunctionPrologue(
  275. CodeGenFunction &CGF, FunctionArgList &Args,
  276. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
  277. &LocalAddrs,
  278. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
  279. &VLASizes,
  280. llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
  281. const CapturedDecl *CD = FO.S->getCapturedDecl();
  282. const RecordDecl *RD = FO.S->getCapturedRecordDecl();
  283. assert(CD->hasBody() && "missing CapturedDecl body");
  284. CXXThisValue = nullptr;
  285. // Build the argument list.
  286. CodeGenModule &CGM = CGF.CGM;
  287. ASTContext &Ctx = CGM.getContext();
  288. FunctionArgList TargetArgs;
  289. Args.append(CD->param_begin(),
  290. std::next(CD->param_begin(), CD->getContextParamPosition()));
  291. TargetArgs.append(
  292. CD->param_begin(),
  293. std::next(CD->param_begin(), CD->getContextParamPosition()));
  294. auto I = FO.S->captures().begin();
  295. FunctionDecl *DebugFunctionDecl = nullptr;
  296. if (!FO.UIntPtrCastRequired) {
  297. FunctionProtoType::ExtProtoInfo EPI;
  298. DebugFunctionDecl = FunctionDecl::Create(
  299. Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
  300. SourceLocation(), DeclarationName(), Ctx.VoidTy,
  301. Ctx.getTrivialTypeSourceInfo(
  302. Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
  303. SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
  304. }
  305. for (auto *FD : RD->fields()) {
  306. QualType ArgType = FD->getType();
  307. IdentifierInfo *II = nullptr;
  308. VarDecl *CapVar = nullptr;
  309. // If this is a capture by copy and the type is not a pointer, the outlined
  310. // function argument type should be uintptr and the value properly casted to
  311. // uintptr. This is necessary given that the runtime library is only able to
  312. // deal with pointers. We can pass in the same way the VLA type sizes to the
  313. // outlined function.
  314. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
  315. I->capturesVariableArrayType()) {
  316. if (FO.UIntPtrCastRequired)
  317. ArgType = Ctx.getUIntPtrType();
  318. }
  319. if (I->capturesVariable() || I->capturesVariableByCopy()) {
  320. CapVar = I->getCapturedVar();
  321. II = CapVar->getIdentifier();
  322. } else if (I->capturesThis())
  323. II = &Ctx.Idents.get("this");
  324. else {
  325. assert(I->capturesVariableArrayType());
  326. II = &Ctx.Idents.get("vla");
  327. }
  328. if (ArgType->isVariablyModifiedType())
  329. ArgType = getCanonicalParamType(Ctx, ArgType);
  330. VarDecl *Arg;
  331. if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
  332. Arg = ParmVarDecl::Create(
  333. Ctx, DebugFunctionDecl,
  334. CapVar ? CapVar->getLocStart() : FD->getLocStart(),
  335. CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
  336. /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
  337. } else {
  338. Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
  339. II, ArgType, ImplicitParamDecl::Other);
  340. }
  341. Args.emplace_back(Arg);
  342. // Do not cast arguments if we emit function with non-original types.
  343. TargetArgs.emplace_back(
  344. FO.UIntPtrCastRequired
  345. ? Arg
  346. : CGM.getOpenMPRuntime().translateParameter(FD, Arg));
  347. ++I;
  348. }
  349. Args.append(
  350. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  351. CD->param_end());
  352. TargetArgs.append(
  353. std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
  354. CD->param_end());
  355. // Create the function declaration.
  356. const CGFunctionInfo &FuncInfo =
  357. CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
  358. llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
  359. llvm::Function *F =
  360. llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
  361. FO.FunctionName, &CGM.getModule());
  362. CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
  363. if (CD->isNothrow())
  364. F->setDoesNotThrow();
  365. // Generate the function.
  366. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
  367. FO.S->getLocStart(), CD->getBody()->getLocStart());
  368. unsigned Cnt = CD->getContextParamPosition();
  369. I = FO.S->captures().begin();
  370. for (auto *FD : RD->fields()) {
  371. // Do not map arguments if we emit function with non-original types.
  372. Address LocalAddr(Address::invalid());
  373. if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
  374. LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt],
  375. TargetArgs[Cnt]);
  376. } else {
  377. LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
  378. }
  379. // If we are capturing a pointer by copy we don't need to do anything, just
  380. // use the value that we get from the arguments.
  381. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
  382. const VarDecl *CurVD = I->getCapturedVar();
  383. // If the variable is a reference we need to materialize it here.
  384. if (CurVD->getType()->isReferenceType()) {
  385. Address RefAddr = CGF.CreateMemTemp(
  386. CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
  387. CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
  388. /*Volatile=*/false, CurVD->getType());
  389. LocalAddr = RefAddr;
  390. }
  391. if (!FO.RegisterCastedArgsOnly)
  392. LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
  393. ++Cnt;
  394. ++I;
  395. continue;
  396. }
  397. LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
  398. AlignmentSource::Decl);
  399. if (FD->hasCapturedVLAType()) {
  400. if (FO.UIntPtrCastRequired) {
  401. ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
  402. Args[Cnt]->getName(),
  403. ArgLVal),
  404. FD->getType(), AlignmentSource::Decl);
  405. }
  406. auto *ExprArg =
  407. CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
  408. auto VAT = FD->getCapturedVLAType();
  409. VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
  410. } else if (I->capturesVariable()) {
  411. auto *Var = I->getCapturedVar();
  412. QualType VarTy = Var->getType();
  413. Address ArgAddr = ArgLVal.getAddress();
  414. if (!VarTy->isReferenceType()) {
  415. if (ArgLVal.getType()->isLValueReferenceType()) {
  416. ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
  417. } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
  418. assert(ArgLVal.getType()->isPointerType());
  419. ArgAddr = CGF.EmitLoadOfPointer(
  420. ArgAddr, ArgLVal.getType()->castAs<PointerType>());
  421. }
  422. }
  423. if (!FO.RegisterCastedArgsOnly) {
  424. LocalAddrs.insert(
  425. {Args[Cnt],
  426. {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
  427. }
  428. } else if (I->capturesVariableByCopy()) {
  429. assert(!FD->getType()->isAnyPointerType() &&
  430. "Not expecting a captured pointer.");
  431. auto *Var = I->getCapturedVar();
  432. QualType VarTy = Var->getType();
  433. LocalAddrs.insert(
  434. {Args[Cnt],
  435. {Var,
  436. FO.UIntPtrCastRequired
  437. ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
  438. ArgLVal, VarTy->isReferenceType())
  439. : ArgLVal.getAddress()}});
  440. } else {
  441. // If 'this' is captured, load it into CXXThisValue.
  442. assert(I->capturesThis());
  443. CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
  444. .getScalarVal();
  445. LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
  446. }
  447. ++Cnt;
  448. ++I;
  449. }
  450. return F;
  451. }
  452. llvm::Function *
  453. CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
  454. assert(
  455. CapturedStmtInfo &&
  456. "CapturedStmtInfo should be set when generating the captured function");
  457. const CapturedDecl *CD = S.getCapturedDecl();
  458. // Build the argument list.
  459. bool NeedWrapperFunction =
  460. getDebugInfo() &&
  461. CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
  462. FunctionArgList Args;
  463. llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
  464. llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
  465. SmallString<256> Buffer;
  466. llvm::raw_svector_ostream Out(Buffer);
  467. Out << CapturedStmtInfo->getHelperName();
  468. if (NeedWrapperFunction)
  469. Out << "_debug__";
  470. FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
  471. Out.str());
  472. llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
  473. VLASizes, CXXThisValue, FO);
  474. for (const auto &LocalAddrPair : LocalAddrs) {
  475. if (LocalAddrPair.second.first) {
  476. setAddrOfLocalVar(LocalAddrPair.second.first,
  477. LocalAddrPair.second.second);
  478. }
  479. }
  480. for (const auto &VLASizePair : VLASizes)
  481. VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
  482. PGO.assignRegionCounters(GlobalDecl(CD), F);
  483. CapturedStmtInfo->EmitBody(*this, CD->getBody());
  484. FinishFunction(CD->getBodyRBrace());
  485. if (!NeedWrapperFunction)
  486. return F;
  487. FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
  488. /*RegisterCastedArgsOnly=*/true,
  489. CapturedStmtInfo->getHelperName());
  490. CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
  491. Args.clear();
  492. LocalAddrs.clear();
  493. VLASizes.clear();
  494. llvm::Function *WrapperF =
  495. emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
  496. WrapperCGF.CXXThisValue, WrapperFO);
  497. llvm::SmallVector<llvm::Value *, 4> CallArgs;
  498. for (const auto *Arg : Args) {
  499. llvm::Value *CallArg;
  500. auto I = LocalAddrs.find(Arg);
  501. if (I != LocalAddrs.end()) {
  502. LValue LV = WrapperCGF.MakeAddrLValue(
  503. I->second.second,
  504. I->second.first ? I->second.first->getType() : Arg->getType(),
  505. AlignmentSource::Decl);
  506. CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
  507. } else {
  508. auto EI = VLASizes.find(Arg);
  509. if (EI != VLASizes.end())
  510. CallArg = EI->second.second;
  511. else {
  512. LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
  513. Arg->getType(),
  514. AlignmentSource::Decl);
  515. CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
  516. }
  517. }
  518. CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
  519. }
  520. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
  521. F, CallArgs);
  522. WrapperCGF.FinishFunction();
  523. return WrapperF;
  524. }
  525. //===----------------------------------------------------------------------===//
  526. // OpenMP Directive Emission
  527. //===----------------------------------------------------------------------===//
  528. void CodeGenFunction::EmitOMPAggregateAssign(
  529. Address DestAddr, Address SrcAddr, QualType OriginalType,
  530. const llvm::function_ref<void(Address, Address)> &CopyGen) {
  531. // Perform element-by-element initialization.
  532. QualType ElementTy;
  533. // Drill down to the base element type on both arrays.
  534. auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
  535. auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
  536. SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
  537. auto SrcBegin = SrcAddr.getPointer();
  538. auto DestBegin = DestAddr.getPointer();
  539. // Cast from pointer to array type to pointer to single element.
  540. auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
  541. // The basic structure here is a while-do loop.
  542. auto BodyBB = createBasicBlock("omp.arraycpy.body");
  543. auto DoneBB = createBasicBlock("omp.arraycpy.done");
  544. auto IsEmpty =
  545. Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
  546. Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
  547. // Enter the loop body, making that address the current address.
  548. auto EntryBB = Builder.GetInsertBlock();
  549. EmitBlock(BodyBB);
  550. CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
  551. llvm::PHINode *SrcElementPHI =
  552. Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
  553. SrcElementPHI->addIncoming(SrcBegin, EntryBB);
  554. Address SrcElementCurrent =
  555. Address(SrcElementPHI,
  556. SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  557. llvm::PHINode *DestElementPHI =
  558. Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
  559. DestElementPHI->addIncoming(DestBegin, EntryBB);
  560. Address DestElementCurrent =
  561. Address(DestElementPHI,
  562. DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
  563. // Emit copy.
  564. CopyGen(DestElementCurrent, SrcElementCurrent);
  565. // Shift the address forward by one element.
  566. auto DestElementNext = Builder.CreateConstGEP1_32(
  567. DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
  568. auto SrcElementNext = Builder.CreateConstGEP1_32(
  569. SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
  570. // Check whether we've reached the end.
  571. auto Done =
  572. Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
  573. Builder.CreateCondBr(Done, DoneBB, BodyBB);
  574. DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
  575. SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
  576. // Done.
  577. EmitBlock(DoneBB, /*IsFinished=*/true);
  578. }
  579. void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
  580. Address SrcAddr, const VarDecl *DestVD,
  581. const VarDecl *SrcVD, const Expr *Copy) {
  582. if (OriginalType->isArrayType()) {
  583. auto *BO = dyn_cast<BinaryOperator>(Copy);
  584. if (BO && BO->getOpcode() == BO_Assign) {
  585. // Perform simple memcpy for simple copying.
  586. EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
  587. } else {
  588. // For arrays with complex element types perform element by element
  589. // copying.
  590. EmitOMPAggregateAssign(
  591. DestAddr, SrcAddr, OriginalType,
  592. [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
  593. // Working with the single array element, so have to remap
  594. // destination and source variables to corresponding array
  595. // elements.
  596. CodeGenFunction::OMPPrivateScope Remap(*this);
  597. Remap.addPrivate(DestVD, [DestElement]() -> Address {
  598. return DestElement;
  599. });
  600. Remap.addPrivate(
  601. SrcVD, [SrcElement]() -> Address { return SrcElement; });
  602. (void)Remap.Privatize();
  603. EmitIgnoredExpr(Copy);
  604. });
  605. }
  606. } else {
  607. // Remap pseudo source variable to private copy.
  608. CodeGenFunction::OMPPrivateScope Remap(*this);
  609. Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
  610. Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
  611. (void)Remap.Privatize();
  612. // Emit copying of the whole variable.
  613. EmitIgnoredExpr(Copy);
  614. }
  615. }
  616. bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
  617. OMPPrivateScope &PrivateScope) {
  618. if (!HaveInsertPoint())
  619. return false;
  620. bool FirstprivateIsLastprivate = false;
  621. llvm::DenseSet<const VarDecl *> Lastprivates;
  622. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  623. for (const auto *D : C->varlists())
  624. Lastprivates.insert(
  625. cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
  626. }
  627. llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
  628. CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
  629. for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
  630. auto IRef = C->varlist_begin();
  631. auto InitsRef = C->inits().begin();
  632. for (auto IInit : C->private_copies()) {
  633. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  634. bool ThisFirstprivateIsLastprivate =
  635. Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
  636. auto *CapFD = CapturesInfo.lookup(OrigVD);
  637. auto *FD = CapturedStmtInfo->lookup(OrigVD);
  638. if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
  639. !FD->getType()->isReferenceType()) {
  640. EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
  641. ++IRef;
  642. ++InitsRef;
  643. continue;
  644. }
  645. FirstprivateIsLastprivate =
  646. FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
  647. if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
  648. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  649. auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
  650. bool IsRegistered;
  651. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  652. /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
  653. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  654. Address OriginalAddr = EmitLValue(&DRE).getAddress();
  655. QualType Type = VD->getType();
  656. if (Type->isArrayType()) {
  657. // Emit VarDecl with copy init for arrays.
  658. // Get the address of the original variable captured in current
  659. // captured region.
  660. IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
  661. auto Emission = EmitAutoVarAlloca(*VD);
  662. auto *Init = VD->getInit();
  663. if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
  664. // Perform simple memcpy.
  665. EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
  666. Type);
  667. } else {
  668. EmitOMPAggregateAssign(
  669. Emission.getAllocatedAddress(), OriginalAddr, Type,
  670. [this, VDInit, Init](Address DestElement,
  671. Address SrcElement) {
  672. // Clean up any temporaries needed by the initialization.
  673. RunCleanupsScope InitScope(*this);
  674. // Emit initialization for single element.
  675. setAddrOfLocalVar(VDInit, SrcElement);
  676. EmitAnyExprToMem(Init, DestElement,
  677. Init->getType().getQualifiers(),
  678. /*IsInitializer*/ false);
  679. LocalDeclMap.erase(VDInit);
  680. });
  681. }
  682. EmitAutoVarCleanups(Emission);
  683. return Emission.getAllocatedAddress();
  684. });
  685. } else {
  686. IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
  687. // Emit private VarDecl with copy init.
  688. // Remap temp VDInit variable to the address of the original
  689. // variable
  690. // (for proper handling of captured global variables).
  691. setAddrOfLocalVar(VDInit, OriginalAddr);
  692. EmitDecl(*VD);
  693. LocalDeclMap.erase(VDInit);
  694. return GetAddrOfLocalVar(VD);
  695. });
  696. }
  697. assert(IsRegistered &&
  698. "firstprivate var already registered as private");
  699. // Silence the warning about unused variable.
  700. (void)IsRegistered;
  701. }
  702. ++IRef;
  703. ++InitsRef;
  704. }
  705. }
  706. return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
  707. }
  708. void CodeGenFunction::EmitOMPPrivateClause(
  709. const OMPExecutableDirective &D,
  710. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  711. if (!HaveInsertPoint())
  712. return;
  713. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  714. for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
  715. auto IRef = C->varlist_begin();
  716. for (auto IInit : C->private_copies()) {
  717. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  718. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  719. auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  720. bool IsRegistered =
  721. PrivateScope.addPrivate(OrigVD, [&]() -> Address {
  722. // Emit private VarDecl with copy init.
  723. EmitDecl(*VD);
  724. return GetAddrOfLocalVar(VD);
  725. });
  726. assert(IsRegistered && "private var already registered as private");
  727. // Silence the warning about unused variable.
  728. (void)IsRegistered;
  729. }
  730. ++IRef;
  731. }
  732. }
  733. }
  734. bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
  735. if (!HaveInsertPoint())
  736. return false;
  737. // threadprivate_var1 = master_threadprivate_var1;
  738. // operator=(threadprivate_var2, master_threadprivate_var2);
  739. // ...
  740. // __kmpc_barrier(&loc, global_tid);
  741. llvm::DenseSet<const VarDecl *> CopiedVars;
  742. llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
  743. for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
  744. auto IRef = C->varlist_begin();
  745. auto ISrcRef = C->source_exprs().begin();
  746. auto IDestRef = C->destination_exprs().begin();
  747. for (auto *AssignOp : C->assignment_ops()) {
  748. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  749. QualType Type = VD->getType();
  750. if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
  751. // Get the address of the master variable. If we are emitting code with
  752. // TLS support, the address is passed from the master as field in the
  753. // captured declaration.
  754. Address MasterAddr = Address::invalid();
  755. if (getLangOpts().OpenMPUseTLS &&
  756. getContext().getTargetInfo().isTLSSupported()) {
  757. assert(CapturedStmtInfo->lookup(VD) &&
  758. "Copyin threadprivates should have been captured!");
  759. DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
  760. VK_LValue, (*IRef)->getExprLoc());
  761. MasterAddr = EmitLValue(&DRE).getAddress();
  762. LocalDeclMap.erase(VD);
  763. } else {
  764. MasterAddr =
  765. Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
  766. : CGM.GetAddrOfGlobal(VD),
  767. getContext().getDeclAlign(VD));
  768. }
  769. // Get the address of the threadprivate variable.
  770. Address PrivateAddr = EmitLValue(*IRef).getAddress();
  771. if (CopiedVars.size() == 1) {
  772. // At first check if current thread is a master thread. If it is, no
  773. // need to copy data.
  774. CopyBegin = createBasicBlock("copyin.not.master");
  775. CopyEnd = createBasicBlock("copyin.not.master.end");
  776. Builder.CreateCondBr(
  777. Builder.CreateICmpNE(
  778. Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
  779. Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
  780. CopyBegin, CopyEnd);
  781. EmitBlock(CopyBegin);
  782. }
  783. auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  784. auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  785. EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
  786. }
  787. ++IRef;
  788. ++ISrcRef;
  789. ++IDestRef;
  790. }
  791. }
  792. if (CopyEnd) {
  793. // Exit out of copying procedure for non-master thread.
  794. EmitBlock(CopyEnd, /*IsFinished=*/true);
  795. return true;
  796. }
  797. return false;
  798. }
  799. bool CodeGenFunction::EmitOMPLastprivateClauseInit(
  800. const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
  801. if (!HaveInsertPoint())
  802. return false;
  803. bool HasAtLeastOneLastprivate = false;
  804. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  805. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  806. auto *LoopDirective = cast<OMPLoopDirective>(&D);
  807. for (auto *C : LoopDirective->counters()) {
  808. SIMDLCVs.insert(
  809. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  810. }
  811. }
  812. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  813. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  814. HasAtLeastOneLastprivate = true;
  815. if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
  816. break;
  817. auto IRef = C->varlist_begin();
  818. auto IDestRef = C->destination_exprs().begin();
  819. for (auto *IInit : C->private_copies()) {
  820. // Keep the address of the original variable for future update at the end
  821. // of the loop.
  822. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  823. // Taskloops do not require additional initialization, it is done in
  824. // runtime support library.
  825. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
  826. auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  827. PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
  828. DeclRefExpr DRE(
  829. const_cast<VarDecl *>(OrigVD),
  830. /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
  831. OrigVD) != nullptr,
  832. (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
  833. return EmitLValue(&DRE).getAddress();
  834. });
  835. // Check if the variable is also a firstprivate: in this case IInit is
  836. // not generated. Initialization of this variable will happen in codegen
  837. // for 'firstprivate' clause.
  838. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
  839. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
  840. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
  841. // Emit private VarDecl with copy init.
  842. EmitDecl(*VD);
  843. return GetAddrOfLocalVar(VD);
  844. });
  845. assert(IsRegistered &&
  846. "lastprivate var already registered as private");
  847. (void)IsRegistered;
  848. }
  849. }
  850. ++IRef;
  851. ++IDestRef;
  852. }
  853. }
  854. return HasAtLeastOneLastprivate;
  855. }
  856. void CodeGenFunction::EmitOMPLastprivateClauseFinal(
  857. const OMPExecutableDirective &D, bool NoFinals,
  858. llvm::Value *IsLastIterCond) {
  859. if (!HaveInsertPoint())
  860. return;
  861. // Emit following code:
  862. // if (<IsLastIterCond>) {
  863. // orig_var1 = private_orig_var1;
  864. // ...
  865. // orig_varn = private_orig_varn;
  866. // }
  867. llvm::BasicBlock *ThenBB = nullptr;
  868. llvm::BasicBlock *DoneBB = nullptr;
  869. if (IsLastIterCond) {
  870. ThenBB = createBasicBlock(".omp.lastprivate.then");
  871. DoneBB = createBasicBlock(".omp.lastprivate.done");
  872. Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
  873. EmitBlock(ThenBB);
  874. }
  875. llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
  876. llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
  877. if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
  878. auto IC = LoopDirective->counters().begin();
  879. for (auto F : LoopDirective->finals()) {
  880. auto *D =
  881. cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
  882. if (NoFinals)
  883. AlreadyEmittedVars.insert(D);
  884. else
  885. LoopCountersAndUpdates[D] = F;
  886. ++IC;
  887. }
  888. }
  889. for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
  890. auto IRef = C->varlist_begin();
  891. auto ISrcRef = C->source_exprs().begin();
  892. auto IDestRef = C->destination_exprs().begin();
  893. for (auto *AssignOp : C->assignment_ops()) {
  894. auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  895. QualType Type = PrivateVD->getType();
  896. auto *CanonicalVD = PrivateVD->getCanonicalDecl();
  897. if (AlreadyEmittedVars.insert(CanonicalVD).second) {
  898. // If lastprivate variable is a loop control variable for loop-based
  899. // directive, update its value before copyin back to original
  900. // variable.
  901. if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
  902. EmitIgnoredExpr(FinalExpr);
  903. auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
  904. auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
  905. // Get the address of the original variable.
  906. Address OriginalAddr = GetAddrOfLocalVar(DestVD);
  907. // Get the address of the private variable.
  908. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
  909. if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
  910. PrivateAddr =
  911. Address(Builder.CreateLoad(PrivateAddr),
  912. getNaturalTypeAlignment(RefTy->getPointeeType()));
  913. EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
  914. }
  915. ++IRef;
  916. ++ISrcRef;
  917. ++IDestRef;
  918. }
  919. if (auto *PostUpdate = C->getPostUpdateExpr())
  920. EmitIgnoredExpr(PostUpdate);
  921. }
  922. if (IsLastIterCond)
  923. EmitBlock(DoneBB, /*IsFinished=*/true);
  924. }
  925. void CodeGenFunction::EmitOMPReductionClauseInit(
  926. const OMPExecutableDirective &D,
  927. CodeGenFunction::OMPPrivateScope &PrivateScope) {
  928. if (!HaveInsertPoint())
  929. return;
  930. SmallVector<const Expr *, 4> Shareds;
  931. SmallVector<const Expr *, 4> Privates;
  932. SmallVector<const Expr *, 4> ReductionOps;
  933. SmallVector<const Expr *, 4> LHSs;
  934. SmallVector<const Expr *, 4> RHSs;
  935. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  936. auto IPriv = C->privates().begin();
  937. auto IRed = C->reduction_ops().begin();
  938. auto ILHS = C->lhs_exprs().begin();
  939. auto IRHS = C->rhs_exprs().begin();
  940. for (const auto *Ref : C->varlists()) {
  941. Shareds.emplace_back(Ref);
  942. Privates.emplace_back(*IPriv);
  943. ReductionOps.emplace_back(*IRed);
  944. LHSs.emplace_back(*ILHS);
  945. RHSs.emplace_back(*IRHS);
  946. std::advance(IPriv, 1);
  947. std::advance(IRed, 1);
  948. std::advance(ILHS, 1);
  949. std::advance(IRHS, 1);
  950. }
  951. }
  952. ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
  953. unsigned Count = 0;
  954. auto ILHS = LHSs.begin();
  955. auto IRHS = RHSs.begin();
  956. auto IPriv = Privates.begin();
  957. for (const auto *IRef : Shareds) {
  958. auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
  959. // Emit private VarDecl with reduction init.
  960. RedCG.emitSharedLValue(*this, Count);
  961. RedCG.emitAggregateType(*this, Count);
  962. auto Emission = EmitAutoVarAlloca(*PrivateVD);
  963. RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
  964. RedCG.getSharedLValue(Count),
  965. [&Emission](CodeGenFunction &CGF) {
  966. CGF.EmitAutoVarInit(Emission);
  967. return true;
  968. });
  969. EmitAutoVarCleanups(Emission);
  970. Address BaseAddr = RedCG.adjustPrivateAddress(
  971. *this, Count, Emission.getAllocatedAddress());
  972. bool IsRegistered = PrivateScope.addPrivate(
  973. RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
  974. assert(IsRegistered && "private var already registered as private");
  975. // Silence the warning about unused variable.
  976. (void)IsRegistered;
  977. auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
  978. auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
  979. QualType Type = PrivateVD->getType();
  980. bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef);
  981. if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
  982. // Store the address of the original variable associated with the LHS
  983. // implicit variable.
  984. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
  985. return RedCG.getSharedLValue(Count).getAddress();
  986. });
  987. PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
  988. return GetAddrOfLocalVar(PrivateVD);
  989. });
  990. } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
  991. isa<ArraySubscriptExpr>(IRef)) {
  992. // Store the address of the original variable associated with the LHS
  993. // implicit variable.
  994. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
  995. return RedCG.getSharedLValue(Count).getAddress();
  996. });
  997. PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
  998. return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
  999. ConvertTypeForMem(RHSVD->getType()),
  1000. "rhs.begin");
  1001. });
  1002. } else {
  1003. QualType Type = PrivateVD->getType();
  1004. bool IsArray = getContext().getAsArrayType(Type) != nullptr;
  1005. Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
  1006. // Store the address of the original variable associated with the LHS
  1007. // implicit variable.
  1008. if (IsArray) {
  1009. OriginalAddr = Builder.CreateElementBitCast(
  1010. OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
  1011. }
  1012. PrivateScope.addPrivate(
  1013. LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
  1014. PrivateScope.addPrivate(
  1015. RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
  1016. return IsArray
  1017. ? Builder.CreateElementBitCast(
  1018. GetAddrOfLocalVar(PrivateVD),
  1019. ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
  1020. : GetAddrOfLocalVar(PrivateVD);
  1021. });
  1022. }
  1023. ++ILHS;
  1024. ++IRHS;
  1025. ++IPriv;
  1026. ++Count;
  1027. }
  1028. }
  1029. void CodeGenFunction::EmitOMPReductionClauseFinal(
  1030. const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
  1031. if (!HaveInsertPoint())
  1032. return;
  1033. llvm::SmallVector<const Expr *, 8> Privates;
  1034. llvm::SmallVector<const Expr *, 8> LHSExprs;
  1035. llvm::SmallVector<const Expr *, 8> RHSExprs;
  1036. llvm::SmallVector<const Expr *, 8> ReductionOps;
  1037. bool HasAtLeastOneReduction = false;
  1038. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1039. HasAtLeastOneReduction = true;
  1040. Privates.append(C->privates().begin(), C->privates().end());
  1041. LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
  1042. RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
  1043. ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
  1044. }
  1045. if (HasAtLeastOneReduction) {
  1046. bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
  1047. isOpenMPParallelDirective(D.getDirectiveKind()) ||
  1048. D.getDirectiveKind() == OMPD_simd;
  1049. bool SimpleReduction = D.getDirectiveKind() == OMPD_simd ||
  1050. D.getDirectiveKind() == OMPD_distribute_simd;
  1051. // Emit nowait reduction if nowait clause is present or directive is a
  1052. // parallel directive (it always has implicit barrier).
  1053. CGM.getOpenMPRuntime().emitReduction(
  1054. *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
  1055. {WithNowait, SimpleReduction, ReductionKind});
  1056. }
  1057. }
  1058. static void emitPostUpdateForReductionClause(
  1059. CodeGenFunction &CGF, const OMPExecutableDirective &D,
  1060. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
  1061. if (!CGF.HaveInsertPoint())
  1062. return;
  1063. llvm::BasicBlock *DoneBB = nullptr;
  1064. for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
  1065. if (auto *PostUpdate = C->getPostUpdateExpr()) {
  1066. if (!DoneBB) {
  1067. if (auto *Cond = CondGen(CGF)) {
  1068. // If the first post-update expression is found, emit conditional
  1069. // block if it was requested.
  1070. auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
  1071. DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
  1072. CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1073. CGF.EmitBlock(ThenBB);
  1074. }
  1075. }
  1076. CGF.EmitIgnoredExpr(PostUpdate);
  1077. }
  1078. }
  1079. if (DoneBB)
  1080. CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
  1081. }
  1082. namespace {
  1083. /// Codegen lambda for appending distribute lower and upper bounds to outlined
  1084. /// parallel function. This is necessary for combined constructs such as
  1085. /// 'distribute parallel for'
  1086. typedef llvm::function_ref<void(CodeGenFunction &,
  1087. const OMPExecutableDirective &,
  1088. llvm::SmallVectorImpl<llvm::Value *> &)>
  1089. CodeGenBoundParametersTy;
  1090. } // anonymous namespace
  1091. static void emitCommonOMPParallelDirective(
  1092. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  1093. OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
  1094. const CodeGenBoundParametersTy &CodeGenBoundParameters) {
  1095. const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
  1096. auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
  1097. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  1098. if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
  1099. CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
  1100. auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
  1101. /*IgnoreResultAssign*/ true);
  1102. CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
  1103. CGF, NumThreads, NumThreadsClause->getLocStart());
  1104. }
  1105. if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
  1106. CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
  1107. CGF.CGM.getOpenMPRuntime().emitProcBindClause(
  1108. CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
  1109. }
  1110. const Expr *IfCond = nullptr;
  1111. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  1112. if (C->getNameModifier() == OMPD_unknown ||
  1113. C->getNameModifier() == OMPD_parallel) {
  1114. IfCond = C->getCondition();
  1115. break;
  1116. }
  1117. }
  1118. OMPParallelScope Scope(CGF, S);
  1119. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  1120. // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
  1121. // lower and upper bounds with the pragma 'for' chunking mechanism.
  1122. // The following lambda takes care of appending the lower and upper bound
  1123. // parameters when necessary
  1124. CodeGenBoundParameters(CGF, S, CapturedVars);
  1125. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  1126. CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
  1127. CapturedVars, IfCond);
  1128. }
  1129. static void emitEmptyBoundParameters(CodeGenFunction &,
  1130. const OMPExecutableDirective &,
  1131. llvm::SmallVectorImpl<llvm::Value *> &) {}
  1132. void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
  1133. // Emit parallel region as a standalone region.
  1134. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1135. OMPPrivateScope PrivateScope(CGF);
  1136. bool Copyins = CGF.EmitOMPCopyinClause(S);
  1137. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  1138. if (Copyins) {
  1139. // Emit implicit barrier to synchronize threads and avoid data races on
  1140. // propagation master's thread values of threadprivate variables to local
  1141. // instances of that variables of all other implicit threads.
  1142. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  1143. CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
  1144. /*ForceSimpleCall=*/true);
  1145. }
  1146. CGF.EmitOMPPrivateClause(S, PrivateScope);
  1147. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  1148. (void)PrivateScope.Privatize();
  1149. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  1150. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  1151. };
  1152. emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
  1153. emitEmptyBoundParameters);
  1154. emitPostUpdateForReductionClause(
  1155. *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  1156. }
  1157. void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
  1158. JumpDest LoopExit) {
  1159. RunCleanupsScope BodyScope(*this);
  1160. // Update counters values on current iteration.
  1161. for (auto I : D.updates()) {
  1162. EmitIgnoredExpr(I);
  1163. }
  1164. // Update the linear variables.
  1165. // In distribute directives only loop counters may be marked as linear, no
  1166. // need to generate the code for them.
  1167. if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
  1168. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1169. for (auto *U : C->updates())
  1170. EmitIgnoredExpr(U);
  1171. }
  1172. }
  1173. // On a continue in the body, jump to the end.
  1174. auto Continue = getJumpDestInCurrentScope("omp.body.continue");
  1175. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1176. // Emit loop body.
  1177. EmitStmt(D.getBody());
  1178. // The end (updates/cleanups).
  1179. EmitBlock(Continue.getBlock());
  1180. BreakContinueStack.pop_back();
  1181. }
  1182. void CodeGenFunction::EmitOMPInnerLoop(
  1183. const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
  1184. const Expr *IncExpr,
  1185. const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
  1186. const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
  1187. auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
  1188. // Start the loop with a block that tests the condition.
  1189. auto CondBlock = createBasicBlock("omp.inner.for.cond");
  1190. EmitBlock(CondBlock);
  1191. const SourceRange &R = S.getSourceRange();
  1192. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1193. SourceLocToDebugLoc(R.getEnd()));
  1194. // If there are any cleanups between here and the loop-exit scope,
  1195. // create a block to stage a loop exit along.
  1196. auto ExitBlock = LoopExit.getBlock();
  1197. if (RequiresCleanup)
  1198. ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
  1199. auto LoopBody = createBasicBlock("omp.inner.for.body");
  1200. // Emit condition.
  1201. EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
  1202. if (ExitBlock != LoopExit.getBlock()) {
  1203. EmitBlock(ExitBlock);
  1204. EmitBranchThroughCleanup(LoopExit);
  1205. }
  1206. EmitBlock(LoopBody);
  1207. incrementProfileCounter(&S);
  1208. // Create a block for the increment.
  1209. auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
  1210. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1211. BodyGen(*this);
  1212. // Emit "IV = IV + 1" and a back-edge to the condition block.
  1213. EmitBlock(Continue.getBlock());
  1214. EmitIgnoredExpr(IncExpr);
  1215. PostIncGen(*this);
  1216. BreakContinueStack.pop_back();
  1217. EmitBranch(CondBlock);
  1218. LoopStack.pop();
  1219. // Emit the fall-through block.
  1220. EmitBlock(LoopExit.getBlock());
  1221. }
  1222. bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
  1223. if (!HaveInsertPoint())
  1224. return false;
  1225. // Emit inits for the linear variables.
  1226. bool HasLinears = false;
  1227. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1228. for (auto *Init : C->inits()) {
  1229. HasLinears = true;
  1230. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
  1231. if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
  1232. AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
  1233. auto *OrigVD = cast<VarDecl>(Ref->getDecl());
  1234. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  1235. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1236. VD->getInit()->getType(), VK_LValue,
  1237. VD->getInit()->getExprLoc());
  1238. EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
  1239. VD->getType()),
  1240. /*capturedByInit=*/false);
  1241. EmitAutoVarCleanups(Emission);
  1242. } else
  1243. EmitVarDecl(*VD);
  1244. }
  1245. // Emit the linear steps for the linear clauses.
  1246. // If a step is not constant, it is pre-calculated before the loop.
  1247. if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
  1248. if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
  1249. EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
  1250. // Emit calculation of the linear step.
  1251. EmitIgnoredExpr(CS);
  1252. }
  1253. }
  1254. return HasLinears;
  1255. }
  1256. void CodeGenFunction::EmitOMPLinearClauseFinal(
  1257. const OMPLoopDirective &D,
  1258. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
  1259. if (!HaveInsertPoint())
  1260. return;
  1261. llvm::BasicBlock *DoneBB = nullptr;
  1262. // Emit the final values of the linear variables.
  1263. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1264. auto IC = C->varlist_begin();
  1265. for (auto *F : C->finals()) {
  1266. if (!DoneBB) {
  1267. if (auto *Cond = CondGen(*this)) {
  1268. // If the first post-update expression is found, emit conditional
  1269. // block if it was requested.
  1270. auto *ThenBB = createBasicBlock(".omp.linear.pu");
  1271. DoneBB = createBasicBlock(".omp.linear.pu.done");
  1272. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1273. EmitBlock(ThenBB);
  1274. }
  1275. }
  1276. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
  1277. DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
  1278. CapturedStmtInfo->lookup(OrigVD) != nullptr,
  1279. (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
  1280. Address OrigAddr = EmitLValue(&DRE).getAddress();
  1281. CodeGenFunction::OMPPrivateScope VarScope(*this);
  1282. VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
  1283. (void)VarScope.Privatize();
  1284. EmitIgnoredExpr(F);
  1285. ++IC;
  1286. }
  1287. if (auto *PostUpdate = C->getPostUpdateExpr())
  1288. EmitIgnoredExpr(PostUpdate);
  1289. }
  1290. if (DoneBB)
  1291. EmitBlock(DoneBB, /*IsFinished=*/true);
  1292. }
  1293. static void emitAlignedClause(CodeGenFunction &CGF,
  1294. const OMPExecutableDirective &D) {
  1295. if (!CGF.HaveInsertPoint())
  1296. return;
  1297. for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
  1298. unsigned ClauseAlignment = 0;
  1299. if (auto AlignmentExpr = Clause->getAlignment()) {
  1300. auto AlignmentCI =
  1301. cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
  1302. ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
  1303. }
  1304. for (auto E : Clause->varlists()) {
  1305. unsigned Alignment = ClauseAlignment;
  1306. if (Alignment == 0) {
  1307. // OpenMP [2.8.1, Description]
  1308. // If no optional parameter is specified, implementation-defined default
  1309. // alignments for SIMD instructions on the target platforms are assumed.
  1310. Alignment =
  1311. CGF.getContext()
  1312. .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
  1313. E->getType()->getPointeeType()))
  1314. .getQuantity();
  1315. }
  1316. assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
  1317. "alignment is not power of 2");
  1318. if (Alignment != 0) {
  1319. llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
  1320. CGF.EmitAlignmentAssumption(PtrValue, Alignment);
  1321. }
  1322. }
  1323. }
  1324. }
  1325. void CodeGenFunction::EmitOMPPrivateLoopCounters(
  1326. const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
  1327. if (!HaveInsertPoint())
  1328. return;
  1329. auto I = S.private_counters().begin();
  1330. for (auto *E : S.counters()) {
  1331. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1332. auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
  1333. (void)LoopScope.addPrivate(VD, [&]() -> Address {
  1334. // Emit var without initialization.
  1335. if (!LocalDeclMap.count(PrivateVD)) {
  1336. auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
  1337. EmitAutoVarCleanups(VarEmission);
  1338. }
  1339. DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
  1340. /*RefersToEnclosingVariableOrCapture=*/false,
  1341. (*I)->getType(), VK_LValue, (*I)->getExprLoc());
  1342. return EmitLValue(&DRE).getAddress();
  1343. });
  1344. if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
  1345. VD->hasGlobalStorage()) {
  1346. (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
  1347. DeclRefExpr DRE(const_cast<VarDecl *>(VD),
  1348. LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
  1349. E->getType(), VK_LValue, E->getExprLoc());
  1350. return EmitLValue(&DRE).getAddress();
  1351. });
  1352. }
  1353. ++I;
  1354. }
  1355. }
  1356. static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1357. const Expr *Cond, llvm::BasicBlock *TrueBlock,
  1358. llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
  1359. if (!CGF.HaveInsertPoint())
  1360. return;
  1361. {
  1362. CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
  1363. CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
  1364. (void)PreCondScope.Privatize();
  1365. // Get initial values of real counters.
  1366. for (auto I : S.inits()) {
  1367. CGF.EmitIgnoredExpr(I);
  1368. }
  1369. }
  1370. // Check that loop is executed at least one time.
  1371. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
  1372. }
  1373. void CodeGenFunction::EmitOMPLinearClause(
  1374. const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
  1375. if (!HaveInsertPoint())
  1376. return;
  1377. llvm::DenseSet<const VarDecl *> SIMDLCVs;
  1378. if (isOpenMPSimdDirective(D.getDirectiveKind())) {
  1379. auto *LoopDirective = cast<OMPLoopDirective>(&D);
  1380. for (auto *C : LoopDirective->counters()) {
  1381. SIMDLCVs.insert(
  1382. cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
  1383. }
  1384. }
  1385. for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
  1386. auto CurPrivate = C->privates().begin();
  1387. for (auto *E : C->varlists()) {
  1388. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  1389. auto *PrivateVD =
  1390. cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
  1391. if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
  1392. bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
  1393. // Emit private VarDecl with copy init.
  1394. EmitVarDecl(*PrivateVD);
  1395. return GetAddrOfLocalVar(PrivateVD);
  1396. });
  1397. assert(IsRegistered && "linear var already registered as private");
  1398. // Silence the warning about unused variable.
  1399. (void)IsRegistered;
  1400. } else
  1401. EmitVarDecl(*PrivateVD);
  1402. ++CurPrivate;
  1403. }
  1404. }
  1405. }
  1406. static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
  1407. const OMPExecutableDirective &D,
  1408. bool IsMonotonic) {
  1409. if (!CGF.HaveInsertPoint())
  1410. return;
  1411. if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
  1412. RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
  1413. /*ignoreResult=*/true);
  1414. llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1415. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1416. // In presence of finite 'safelen', it may be unsafe to mark all
  1417. // the memory instructions parallel, because loop-carried
  1418. // dependences of 'safelen' iterations are possible.
  1419. if (!IsMonotonic)
  1420. CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
  1421. } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
  1422. RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
  1423. /*ignoreResult=*/true);
  1424. llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
  1425. CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
  1426. // In presence of finite 'safelen', it may be unsafe to mark all
  1427. // the memory instructions parallel, because loop-carried
  1428. // dependences of 'safelen' iterations are possible.
  1429. CGF.LoopStack.setParallel(false);
  1430. }
  1431. }
  1432. void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
  1433. bool IsMonotonic) {
  1434. // Walk clauses and process safelen/lastprivate.
  1435. LoopStack.setParallel(!IsMonotonic);
  1436. LoopStack.setVectorizeEnable(true);
  1437. emitSimdlenSafelenClause(*this, D, IsMonotonic);
  1438. }
  1439. void CodeGenFunction::EmitOMPSimdFinal(
  1440. const OMPLoopDirective &D,
  1441. const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
  1442. if (!HaveInsertPoint())
  1443. return;
  1444. llvm::BasicBlock *DoneBB = nullptr;
  1445. auto IC = D.counters().begin();
  1446. auto IPC = D.private_counters().begin();
  1447. for (auto F : D.finals()) {
  1448. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
  1449. auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
  1450. auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
  1451. if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
  1452. OrigVD->hasGlobalStorage() || CED) {
  1453. if (!DoneBB) {
  1454. if (auto *Cond = CondGen(*this)) {
  1455. // If the first post-update expression is found, emit conditional
  1456. // block if it was requested.
  1457. auto *ThenBB = createBasicBlock(".omp.final.then");
  1458. DoneBB = createBasicBlock(".omp.final.done");
  1459. Builder.CreateCondBr(Cond, ThenBB, DoneBB);
  1460. EmitBlock(ThenBB);
  1461. }
  1462. }
  1463. Address OrigAddr = Address::invalid();
  1464. if (CED)
  1465. OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
  1466. else {
  1467. DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
  1468. /*RefersToEnclosingVariableOrCapture=*/false,
  1469. (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
  1470. OrigAddr = EmitLValue(&DRE).getAddress();
  1471. }
  1472. OMPPrivateScope VarScope(*this);
  1473. VarScope.addPrivate(OrigVD,
  1474. [OrigAddr]() -> Address { return OrigAddr; });
  1475. (void)VarScope.Privatize();
  1476. EmitIgnoredExpr(F);
  1477. }
  1478. ++IC;
  1479. ++IPC;
  1480. }
  1481. if (DoneBB)
  1482. EmitBlock(DoneBB, /*IsFinished=*/true);
  1483. }
  1484. static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
  1485. const OMPLoopDirective &S,
  1486. CodeGenFunction::JumpDest LoopExit) {
  1487. CGF.EmitOMPLoopBody(S, LoopExit);
  1488. CGF.EmitStopPoint(&S);
  1489. }
  1490. static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
  1491. PrePostActionTy &Action) {
  1492. Action.Enter(CGF);
  1493. assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
  1494. "Expected simd directive");
  1495. OMPLoopScope PreInitScope(CGF, S);
  1496. // if (PreCond) {
  1497. // for (IV in 0..LastIteration) BODY;
  1498. // <Final counter/linear vars updates>;
  1499. // }
  1500. //
  1501. // Emit: if (PreCond) - begin.
  1502. // If the condition constant folds and can be elided, avoid emitting the
  1503. // whole loop.
  1504. bool CondConstant;
  1505. llvm::BasicBlock *ContBlock = nullptr;
  1506. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  1507. if (!CondConstant)
  1508. return;
  1509. } else {
  1510. auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
  1511. ContBlock = CGF.createBasicBlock("simd.if.end");
  1512. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  1513. CGF.getProfileCount(&S));
  1514. CGF.EmitBlock(ThenBlock);
  1515. CGF.incrementProfileCounter(&S);
  1516. }
  1517. // Emit the loop iteration variable.
  1518. const Expr *IVExpr = S.getIterationVariable();
  1519. const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  1520. CGF.EmitVarDecl(*IVDecl);
  1521. CGF.EmitIgnoredExpr(S.getInit());
  1522. // Emit the iterations count variable.
  1523. // If it is not a variable, Sema decided to calculate iterations count on
  1524. // each iteration (e.g., it is foldable into a constant).
  1525. if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  1526. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  1527. // Emit calculation of the iterations count.
  1528. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  1529. }
  1530. CGF.EmitOMPSimdInit(S);
  1531. emitAlignedClause(CGF, S);
  1532. (void)CGF.EmitOMPLinearClauseInit(S);
  1533. {
  1534. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  1535. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  1536. CGF.EmitOMPLinearClause(S, LoopScope);
  1537. CGF.EmitOMPPrivateClause(S, LoopScope);
  1538. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  1539. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  1540. (void)LoopScope.Privatize();
  1541. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  1542. S.getInc(),
  1543. [&S](CodeGenFunction &CGF) {
  1544. CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
  1545. CGF.EmitStopPoint(&S);
  1546. },
  1547. [](CodeGenFunction &) {});
  1548. CGF.EmitOMPSimdFinal(
  1549. S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  1550. // Emit final copy of the lastprivate variables at the end of loops.
  1551. if (HasLastprivateClause)
  1552. CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
  1553. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
  1554. emitPostUpdateForReductionClause(
  1555. CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  1556. }
  1557. CGF.EmitOMPLinearClauseFinal(
  1558. S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  1559. // Emit: if (PreCond) - end.
  1560. if (ContBlock) {
  1561. CGF.EmitBranch(ContBlock);
  1562. CGF.EmitBlock(ContBlock, true);
  1563. }
  1564. }
  1565. void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
  1566. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1567. emitOMPSimdRegion(CGF, S, Action);
  1568. };
  1569. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1570. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  1571. }
  1572. void CodeGenFunction::EmitOMPOuterLoop(
  1573. bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
  1574. CodeGenFunction::OMPPrivateScope &LoopScope,
  1575. const CodeGenFunction::OMPLoopArguments &LoopArgs,
  1576. const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
  1577. const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
  1578. auto &RT = CGM.getOpenMPRuntime();
  1579. const Expr *IVExpr = S.getIterationVariable();
  1580. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1581. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1582. auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
  1583. // Start the loop with a block that tests the condition.
  1584. auto CondBlock = createBasicBlock("omp.dispatch.cond");
  1585. EmitBlock(CondBlock);
  1586. const SourceRange &R = S.getSourceRange();
  1587. LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
  1588. SourceLocToDebugLoc(R.getEnd()));
  1589. llvm::Value *BoolCondVal = nullptr;
  1590. if (!DynamicOrOrdered) {
  1591. // UB = min(UB, GlobalUB) or
  1592. // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
  1593. // 'distribute parallel for')
  1594. EmitIgnoredExpr(LoopArgs.EUB);
  1595. // IV = LB
  1596. EmitIgnoredExpr(LoopArgs.Init);
  1597. // IV < UB
  1598. BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
  1599. } else {
  1600. BoolCondVal =
  1601. RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
  1602. LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
  1603. }
  1604. // If there are any cleanups between here and the loop-exit scope,
  1605. // create a block to stage a loop exit along.
  1606. auto ExitBlock = LoopExit.getBlock();
  1607. if (LoopScope.requiresCleanups())
  1608. ExitBlock = createBasicBlock("omp.dispatch.cleanup");
  1609. auto LoopBody = createBasicBlock("omp.dispatch.body");
  1610. Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
  1611. if (ExitBlock != LoopExit.getBlock()) {
  1612. EmitBlock(ExitBlock);
  1613. EmitBranchThroughCleanup(LoopExit);
  1614. }
  1615. EmitBlock(LoopBody);
  1616. // Emit "IV = LB" (in case of static schedule, we have already calculated new
  1617. // LB for loop condition and emitted it above).
  1618. if (DynamicOrOrdered)
  1619. EmitIgnoredExpr(LoopArgs.Init);
  1620. // Create a block for the increment.
  1621. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
  1622. BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
  1623. // Generate !llvm.loop.parallel metadata for loads and stores for loops
  1624. // with dynamic/guided scheduling and without ordered clause.
  1625. if (!isOpenMPSimdDirective(S.getDirectiveKind()))
  1626. LoopStack.setParallel(!IsMonotonic);
  1627. else
  1628. EmitOMPSimdInit(S, IsMonotonic);
  1629. SourceLocation Loc = S.getLocStart();
  1630. // when 'distribute' is not combined with a 'for':
  1631. // while (idx <= UB) { BODY; ++idx; }
  1632. // when 'distribute' is combined with a 'for'
  1633. // (e.g. 'distribute parallel for')
  1634. // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
  1635. EmitOMPInnerLoop(
  1636. S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
  1637. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  1638. CodeGenLoop(CGF, S, LoopExit);
  1639. },
  1640. [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
  1641. CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
  1642. });
  1643. EmitBlock(Continue.getBlock());
  1644. BreakContinueStack.pop_back();
  1645. if (!DynamicOrOrdered) {
  1646. // Emit "LB = LB + Stride", "UB = UB + Stride".
  1647. EmitIgnoredExpr(LoopArgs.NextLB);
  1648. EmitIgnoredExpr(LoopArgs.NextUB);
  1649. }
  1650. EmitBranch(CondBlock);
  1651. LoopStack.pop();
  1652. // Emit the fall-through block.
  1653. EmitBlock(LoopExit.getBlock());
  1654. // Tell the runtime we are done.
  1655. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
  1656. if (!DynamicOrOrdered)
  1657. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
  1658. S.getDirectiveKind());
  1659. };
  1660. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  1661. }
  1662. void CodeGenFunction::EmitOMPForOuterLoop(
  1663. const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
  1664. const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
  1665. const OMPLoopArguments &LoopArgs,
  1666. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  1667. auto &RT = CGM.getOpenMPRuntime();
  1668. // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
  1669. const bool DynamicOrOrdered =
  1670. Ordered || RT.isDynamic(ScheduleKind.Schedule);
  1671. assert((Ordered ||
  1672. !RT.isStaticNonchunked(ScheduleKind.Schedule,
  1673. LoopArgs.Chunk != nullptr)) &&
  1674. "static non-chunked schedule does not need outer loop");
  1675. // Emit outer loop.
  1676. //
  1677. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1678. // When schedule(dynamic,chunk_size) is specified, the iterations are
  1679. // distributed to threads in the team in chunks as the threads request them.
  1680. // Each thread executes a chunk of iterations, then requests another chunk,
  1681. // until no chunks remain to be distributed. Each chunk contains chunk_size
  1682. // iterations, except for the last chunk to be distributed, which may have
  1683. // fewer iterations. When no chunk_size is specified, it defaults to 1.
  1684. //
  1685. // When schedule(guided,chunk_size) is specified, the iterations are assigned
  1686. // to threads in the team in chunks as the executing threads request them.
  1687. // Each thread executes a chunk of iterations, then requests another chunk,
  1688. // until no chunks remain to be assigned. For a chunk_size of 1, the size of
  1689. // each chunk is proportional to the number of unassigned iterations divided
  1690. // by the number of threads in the team, decreasing to 1. For a chunk_size
  1691. // with value k (greater than 1), the size of each chunk is determined in the
  1692. // same way, with the restriction that the chunks do not contain fewer than k
  1693. // iterations (except for the last chunk to be assigned, which may have fewer
  1694. // than k iterations).
  1695. //
  1696. // When schedule(auto) is specified, the decision regarding scheduling is
  1697. // delegated to the compiler and/or runtime system. The programmer gives the
  1698. // implementation the freedom to choose any possible mapping of iterations to
  1699. // threads in the team.
  1700. //
  1701. // When schedule(runtime) is specified, the decision regarding scheduling is
  1702. // deferred until run time, and the schedule and chunk size are taken from the
  1703. // run-sched-var ICV. If the ICV is set to auto, the schedule is
  1704. // implementation defined
  1705. //
  1706. // while(__kmpc_dispatch_next(&LB, &UB)) {
  1707. // idx = LB;
  1708. // while (idx <= UB) { BODY; ++idx;
  1709. // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
  1710. // } // inner loop
  1711. // }
  1712. //
  1713. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  1714. // When schedule(static, chunk_size) is specified, iterations are divided into
  1715. // chunks of size chunk_size, and the chunks are assigned to the threads in
  1716. // the team in a round-robin fashion in the order of the thread number.
  1717. //
  1718. // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
  1719. // while (idx <= UB) { BODY; ++idx; } // inner loop
  1720. // LB = LB + ST;
  1721. // UB = UB + ST;
  1722. // }
  1723. //
  1724. const Expr *IVExpr = S.getIterationVariable();
  1725. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1726. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1727. if (DynamicOrOrdered) {
  1728. auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
  1729. llvm::Value *LBVal = DispatchBounds.first;
  1730. llvm::Value *UBVal = DispatchBounds.second;
  1731. CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
  1732. LoopArgs.Chunk};
  1733. RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
  1734. IVSigned, Ordered, DipatchRTInputValues);
  1735. } else {
  1736. CGOpenMPRuntime::StaticRTInput StaticInit(
  1737. IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
  1738. LoopArgs.ST, LoopArgs.Chunk);
  1739. RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
  1740. ScheduleKind, StaticInit);
  1741. }
  1742. auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
  1743. const unsigned IVSize,
  1744. const bool IVSigned) {
  1745. if (Ordered) {
  1746. CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
  1747. IVSigned);
  1748. }
  1749. };
  1750. OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
  1751. LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
  1752. OuterLoopArgs.IncExpr = S.getInc();
  1753. OuterLoopArgs.Init = S.getInit();
  1754. OuterLoopArgs.Cond = S.getCond();
  1755. OuterLoopArgs.NextLB = S.getNextLowerBound();
  1756. OuterLoopArgs.NextUB = S.getNextUpperBound();
  1757. EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
  1758. emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
  1759. }
  1760. static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
  1761. const unsigned IVSize, const bool IVSigned) {}
  1762. void CodeGenFunction::EmitOMPDistributeOuterLoop(
  1763. OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
  1764. OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
  1765. const CodeGenLoopTy &CodeGenLoopContent) {
  1766. auto &RT = CGM.getOpenMPRuntime();
  1767. // Emit outer loop.
  1768. // Same behavior as a OMPForOuterLoop, except that schedule cannot be
  1769. // dynamic
  1770. //
  1771. const Expr *IVExpr = S.getIterationVariable();
  1772. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  1773. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  1774. CGOpenMPRuntime::StaticRTInput StaticInit(
  1775. IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
  1776. LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
  1777. RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
  1778. // for combined 'distribute' and 'for' the increment expression of distribute
  1779. // is store in DistInc. For 'distribute' alone, it is in Inc.
  1780. Expr *IncExpr;
  1781. if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
  1782. IncExpr = S.getDistInc();
  1783. else
  1784. IncExpr = S.getInc();
  1785. // this routine is shared by 'omp distribute parallel for' and
  1786. // 'omp distribute': select the right EUB expression depending on the
  1787. // directive
  1788. OMPLoopArguments OuterLoopArgs;
  1789. OuterLoopArgs.LB = LoopArgs.LB;
  1790. OuterLoopArgs.UB = LoopArgs.UB;
  1791. OuterLoopArgs.ST = LoopArgs.ST;
  1792. OuterLoopArgs.IL = LoopArgs.IL;
  1793. OuterLoopArgs.Chunk = LoopArgs.Chunk;
  1794. OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1795. ? S.getCombinedEnsureUpperBound()
  1796. : S.getEnsureUpperBound();
  1797. OuterLoopArgs.IncExpr = IncExpr;
  1798. OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1799. ? S.getCombinedInit()
  1800. : S.getInit();
  1801. OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1802. ? S.getCombinedCond()
  1803. : S.getCond();
  1804. OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1805. ? S.getCombinedNextLowerBound()
  1806. : S.getNextLowerBound();
  1807. OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  1808. ? S.getCombinedNextUpperBound()
  1809. : S.getNextUpperBound();
  1810. EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
  1811. LoopScope, OuterLoopArgs, CodeGenLoopContent,
  1812. emitEmptyOrdered);
  1813. }
  1814. /// Emit a helper variable and return corresponding lvalue.
  1815. static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
  1816. const DeclRefExpr *Helper) {
  1817. auto VDecl = cast<VarDecl>(Helper->getDecl());
  1818. CGF.EmitVarDecl(*VDecl);
  1819. return CGF.EmitLValue(Helper);
  1820. }
  1821. static std::pair<LValue, LValue>
  1822. emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
  1823. const OMPExecutableDirective &S) {
  1824. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  1825. LValue LB =
  1826. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  1827. LValue UB =
  1828. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  1829. // When composing 'distribute' with 'for' (e.g. as in 'distribute
  1830. // parallel for') we need to use the 'distribute'
  1831. // chunk lower and upper bounds rather than the whole loop iteration
  1832. // space. These are parameters to the outlined function for 'parallel'
  1833. // and we copy the bounds of the previous schedule into the
  1834. // the current ones.
  1835. LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
  1836. LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
  1837. llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
  1838. PrevLBVal = CGF.EmitScalarConversion(
  1839. PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
  1840. LS.getIterationVariable()->getType(), SourceLocation());
  1841. llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
  1842. PrevUBVal = CGF.EmitScalarConversion(
  1843. PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
  1844. LS.getIterationVariable()->getType(), SourceLocation());
  1845. CGF.EmitStoreOfScalar(PrevLBVal, LB);
  1846. CGF.EmitStoreOfScalar(PrevUBVal, UB);
  1847. return {LB, UB};
  1848. }
  1849. /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
  1850. /// we need to use the LB and UB expressions generated by the worksharing
  1851. /// code generation support, whereas in non combined situations we would
  1852. /// just emit 0 and the LastIteration expression
  1853. /// This function is necessary due to the difference of the LB and UB
  1854. /// types for the RT emission routines for 'for_static_init' and
  1855. /// 'for_dispatch_init'
  1856. static std::pair<llvm::Value *, llvm::Value *>
  1857. emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
  1858. const OMPExecutableDirective &S,
  1859. Address LB, Address UB) {
  1860. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  1861. const Expr *IVExpr = LS.getIterationVariable();
  1862. // when implementing a dynamic schedule for a 'for' combined with a
  1863. // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
  1864. // is not normalized as each team only executes its own assigned
  1865. // distribute chunk
  1866. QualType IteratorTy = IVExpr->getType();
  1867. llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
  1868. SourceLocation());
  1869. llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
  1870. SourceLocation());
  1871. return {LBVal, UBVal};
  1872. }
  1873. static void emitDistributeParallelForDistributeInnerBoundParams(
  1874. CodeGenFunction &CGF, const OMPExecutableDirective &S,
  1875. llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
  1876. const auto &Dir = cast<OMPLoopDirective>(S);
  1877. LValue LB =
  1878. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
  1879. auto LBCast = CGF.Builder.CreateIntCast(
  1880. CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  1881. CapturedVars.push_back(LBCast);
  1882. LValue UB =
  1883. CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
  1884. auto UBCast = CGF.Builder.CreateIntCast(
  1885. CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
  1886. CapturedVars.push_back(UBCast);
  1887. }
  1888. static void
  1889. emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
  1890. const OMPLoopDirective &S,
  1891. CodeGenFunction::JumpDest LoopExit) {
  1892. auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
  1893. PrePostActionTy &) {
  1894. bool HasCancel = false;
  1895. if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
  1896. if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
  1897. HasCancel = D->hasCancel();
  1898. else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
  1899. HasCancel = D->hasCancel();
  1900. else if (const auto *D =
  1901. dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
  1902. HasCancel = D->hasCancel();
  1903. }
  1904. CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
  1905. HasCancel);
  1906. CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
  1907. emitDistributeParallelForInnerBounds,
  1908. emitDistributeParallelForDispatchBounds);
  1909. };
  1910. emitCommonOMPParallelDirective(
  1911. CGF, S,
  1912. isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
  1913. CGInlinedWorksharingLoop,
  1914. emitDistributeParallelForDistributeInnerBoundParams);
  1915. }
  1916. void CodeGenFunction::EmitOMPDistributeParallelForDirective(
  1917. const OMPDistributeParallelForDirective &S) {
  1918. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1919. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  1920. S.getDistInc());
  1921. };
  1922. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1923. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  1924. }
  1925. void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
  1926. const OMPDistributeParallelForSimdDirective &S) {
  1927. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1928. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  1929. S.getDistInc());
  1930. };
  1931. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1932. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  1933. }
  1934. void CodeGenFunction::EmitOMPDistributeSimdDirective(
  1935. const OMPDistributeSimdDirective &S) {
  1936. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1937. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  1938. };
  1939. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1940. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  1941. }
  1942. void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
  1943. CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
  1944. // Emit SPMD target parallel for region as a standalone region.
  1945. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1946. emitOMPSimdRegion(CGF, S, Action);
  1947. };
  1948. llvm::Function *Fn;
  1949. llvm::Constant *Addr;
  1950. // Emit target region as a standalone region.
  1951. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  1952. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  1953. assert(Fn && Addr && "Target device function emission failed.");
  1954. }
  1955. void CodeGenFunction::EmitOMPTargetSimdDirective(
  1956. const OMPTargetSimdDirective &S) {
  1957. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  1958. emitOMPSimdRegion(CGF, S, Action);
  1959. };
  1960. emitCommonOMPTargetDirective(*this, S, CodeGen);
  1961. }
  1962. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
  1963. const OMPTargetTeamsDistributeParallelForDirective &S) {
  1964. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1965. CGM.getOpenMPRuntime().emitInlinedDirective(
  1966. *this, OMPD_target_teams_distribute_parallel_for,
  1967. [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1968. CGF.EmitStmt(
  1969. cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  1970. });
  1971. }
  1972. void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
  1973. const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
  1974. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  1975. CGM.getOpenMPRuntime().emitInlinedDirective(
  1976. *this, OMPD_target_teams_distribute_parallel_for_simd,
  1977. [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  1978. CGF.EmitStmt(
  1979. cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  1980. });
  1981. }
  1982. namespace {
  1983. struct ScheduleKindModifiersTy {
  1984. OpenMPScheduleClauseKind Kind;
  1985. OpenMPScheduleClauseModifier M1;
  1986. OpenMPScheduleClauseModifier M2;
  1987. ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
  1988. OpenMPScheduleClauseModifier M1,
  1989. OpenMPScheduleClauseModifier M2)
  1990. : Kind(Kind), M1(M1), M2(M2) {}
  1991. };
  1992. } // namespace
  1993. bool CodeGenFunction::EmitOMPWorksharingLoop(
  1994. const OMPLoopDirective &S, Expr *EUB,
  1995. const CodeGenLoopBoundsTy &CodeGenLoopBounds,
  1996. const CodeGenDispatchBoundsTy &CGDispatchBounds) {
  1997. // Emit the loop iteration variable.
  1998. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  1999. auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
  2000. EmitVarDecl(*IVDecl);
  2001. // Emit the iterations count variable.
  2002. // If it is not a variable, Sema decided to calculate iterations count on each
  2003. // iteration (e.g., it is foldable into a constant).
  2004. if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  2005. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  2006. // Emit calculation of the iterations count.
  2007. EmitIgnoredExpr(S.getCalcLastIteration());
  2008. }
  2009. auto &RT = CGM.getOpenMPRuntime();
  2010. bool HasLastprivateClause;
  2011. // Check pre-condition.
  2012. {
  2013. OMPLoopScope PreInitScope(*this, S);
  2014. // Skip the entire loop if we don't meet the precondition.
  2015. // If the condition constant folds and can be elided, avoid emitting the
  2016. // whole loop.
  2017. bool CondConstant;
  2018. llvm::BasicBlock *ContBlock = nullptr;
  2019. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  2020. if (!CondConstant)
  2021. return false;
  2022. } else {
  2023. auto *ThenBlock = createBasicBlock("omp.precond.then");
  2024. ContBlock = createBasicBlock("omp.precond.end");
  2025. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  2026. getProfileCount(&S));
  2027. EmitBlock(ThenBlock);
  2028. incrementProfileCounter(&S);
  2029. }
  2030. bool Ordered = false;
  2031. if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
  2032. if (OrderedClause->getNumForLoops())
  2033. RT.emitDoacrossInit(*this, S);
  2034. else
  2035. Ordered = true;
  2036. }
  2037. llvm::DenseSet<const Expr *> EmittedFinals;
  2038. emitAlignedClause(*this, S);
  2039. bool HasLinears = EmitOMPLinearClauseInit(S);
  2040. // Emit helper vars inits.
  2041. std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
  2042. LValue LB = Bounds.first;
  2043. LValue UB = Bounds.second;
  2044. LValue ST =
  2045. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  2046. LValue IL =
  2047. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  2048. // Emit 'then' code.
  2049. {
  2050. OMPPrivateScope LoopScope(*this);
  2051. if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
  2052. // Emit implicit barrier to synchronize threads and avoid data races on
  2053. // initialization of firstprivate variables and post-update of
  2054. // lastprivate variables.
  2055. CGM.getOpenMPRuntime().emitBarrierCall(
  2056. *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
  2057. /*ForceSimpleCall=*/true);
  2058. }
  2059. EmitOMPPrivateClause(S, LoopScope);
  2060. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  2061. EmitOMPReductionClauseInit(S, LoopScope);
  2062. EmitOMPPrivateLoopCounters(S, LoopScope);
  2063. EmitOMPLinearClause(S, LoopScope);
  2064. (void)LoopScope.Privatize();
  2065. // Detect the loop schedule kind and chunk.
  2066. llvm::Value *Chunk = nullptr;
  2067. OpenMPScheduleTy ScheduleKind;
  2068. if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
  2069. ScheduleKind.Schedule = C->getScheduleKind();
  2070. ScheduleKind.M1 = C->getFirstScheduleModifier();
  2071. ScheduleKind.M2 = C->getSecondScheduleModifier();
  2072. if (const auto *Ch = C->getChunkSize()) {
  2073. Chunk = EmitScalarExpr(Ch);
  2074. Chunk = EmitScalarConversion(Chunk, Ch->getType(),
  2075. S.getIterationVariable()->getType(),
  2076. S.getLocStart());
  2077. }
  2078. }
  2079. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  2080. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  2081. // OpenMP 4.5, 2.7.1 Loop Construct, Description.
  2082. // If the static schedule kind is specified or if the ordered clause is
  2083. // specified, and if no monotonic modifier is specified, the effect will
  2084. // be as if the monotonic modifier was specified.
  2085. if (RT.isStaticNonchunked(ScheduleKind.Schedule,
  2086. /* Chunked */ Chunk != nullptr) &&
  2087. !Ordered) {
  2088. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  2089. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  2090. // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
  2091. // When no chunk_size is specified, the iteration space is divided into
  2092. // chunks that are approximately equal in size, and at most one chunk is
  2093. // distributed to each thread. Note that the size of the chunks is
  2094. // unspecified in this case.
  2095. CGOpenMPRuntime::StaticRTInput StaticInit(
  2096. IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
  2097. UB.getAddress(), ST.getAddress());
  2098. RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
  2099. ScheduleKind, StaticInit);
  2100. auto LoopExit =
  2101. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  2102. // UB = min(UB, GlobalUB);
  2103. EmitIgnoredExpr(S.getEnsureUpperBound());
  2104. // IV = LB;
  2105. EmitIgnoredExpr(S.getInit());
  2106. // while (idx <= UB) { BODY; ++idx; }
  2107. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  2108. S.getInc(),
  2109. [&S, LoopExit](CodeGenFunction &CGF) {
  2110. CGF.EmitOMPLoopBody(S, LoopExit);
  2111. CGF.EmitStopPoint(&S);
  2112. },
  2113. [](CodeGenFunction &) {});
  2114. EmitBlock(LoopExit.getBlock());
  2115. // Tell the runtime we are done.
  2116. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2117. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
  2118. S.getDirectiveKind());
  2119. };
  2120. OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
  2121. } else {
  2122. const bool IsMonotonic =
  2123. Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
  2124. ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
  2125. ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
  2126. ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
  2127. // Emit the outer loop, which requests its work chunk [LB..UB] from
  2128. // runtime and runs the inner loop to process it.
  2129. const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
  2130. ST.getAddress(), IL.getAddress(),
  2131. Chunk, EUB);
  2132. EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
  2133. LoopArguments, CGDispatchBounds);
  2134. }
  2135. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  2136. EmitOMPSimdFinal(S,
  2137. [&](CodeGenFunction &CGF) -> llvm::Value * {
  2138. return CGF.Builder.CreateIsNotNull(
  2139. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2140. });
  2141. }
  2142. EmitOMPReductionClauseFinal(
  2143. S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
  2144. ? /*Parallel and Simd*/ OMPD_parallel_for_simd
  2145. : /*Parallel only*/ OMPD_parallel);
  2146. // Emit post-update of the reduction variables if IsLastIter != 0.
  2147. emitPostUpdateForReductionClause(
  2148. *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
  2149. return CGF.Builder.CreateIsNotNull(
  2150. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2151. });
  2152. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2153. if (HasLastprivateClause)
  2154. EmitOMPLastprivateClauseFinal(
  2155. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  2156. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
  2157. }
  2158. EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
  2159. return CGF.Builder.CreateIsNotNull(
  2160. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2161. });
  2162. // We're now done with the loop, so jump to the continuation block.
  2163. if (ContBlock) {
  2164. EmitBranch(ContBlock);
  2165. EmitBlock(ContBlock, true);
  2166. }
  2167. }
  2168. return HasLastprivateClause;
  2169. }
  2170. /// The following two functions generate expressions for the loop lower
  2171. /// and upper bounds in case of static and dynamic (dispatch) schedule
  2172. /// of the associated 'for' or 'distribute' loop.
  2173. static std::pair<LValue, LValue>
  2174. emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
  2175. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  2176. LValue LB =
  2177. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
  2178. LValue UB =
  2179. EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
  2180. return {LB, UB};
  2181. }
  2182. /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
  2183. /// consider the lower and upper bound expressions generated by the
  2184. /// worksharing loop support, but we use 0 and the iteration space size as
  2185. /// constants
  2186. static std::pair<llvm::Value *, llvm::Value *>
  2187. emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
  2188. Address LB, Address UB) {
  2189. const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
  2190. const Expr *IVExpr = LS.getIterationVariable();
  2191. const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
  2192. llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
  2193. llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
  2194. return {LBVal, UBVal};
  2195. }
  2196. void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
  2197. bool HasLastprivates = false;
  2198. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2199. PrePostActionTy &) {
  2200. OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
  2201. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2202. emitForLoopBounds,
  2203. emitDispatchForLoopBounds);
  2204. };
  2205. {
  2206. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2207. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
  2208. S.hasCancel());
  2209. }
  2210. // Emit an implicit barrier at the end.
  2211. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
  2212. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
  2213. }
  2214. }
  2215. void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
  2216. bool HasLastprivates = false;
  2217. auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
  2218. PrePostActionTy &) {
  2219. HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
  2220. emitForLoopBounds,
  2221. emitDispatchForLoopBounds);
  2222. };
  2223. {
  2224. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2225. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
  2226. }
  2227. // Emit an implicit barrier at the end.
  2228. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
  2229. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
  2230. }
  2231. }
  2232. static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
  2233. const Twine &Name,
  2234. llvm::Value *Init = nullptr) {
  2235. auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
  2236. if (Init)
  2237. CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
  2238. return LVal;
  2239. }
  2240. void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
  2241. auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
  2242. auto *CS = dyn_cast<CompoundStmt>(Stmt);
  2243. bool HasLastprivates = false;
  2244. auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
  2245. PrePostActionTy &) {
  2246. auto &C = CGF.CGM.getContext();
  2247. auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
  2248. // Emit helper vars inits.
  2249. LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
  2250. CGF.Builder.getInt32(0));
  2251. auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
  2252. : CGF.Builder.getInt32(0);
  2253. LValue UB =
  2254. createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
  2255. LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
  2256. CGF.Builder.getInt32(1));
  2257. LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
  2258. CGF.Builder.getInt32(0));
  2259. // Loop counter.
  2260. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
  2261. OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
  2262. CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
  2263. OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
  2264. CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
  2265. // Generate condition for loop.
  2266. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
  2267. OK_Ordinary, S.getLocStart(), FPOptions());
  2268. // Increment for loop counter.
  2269. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
  2270. S.getLocStart());
  2271. auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
  2272. // Iterate through all sections and emit a switch construct:
  2273. // switch (IV) {
  2274. // case 0:
  2275. // <SectionStmt[0]>;
  2276. // break;
  2277. // ...
  2278. // case <NumSection> - 1:
  2279. // <SectionStmt[<NumSection> - 1]>;
  2280. // break;
  2281. // }
  2282. // .omp.sections.exit:
  2283. auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
  2284. auto *SwitchStmt = CGF.Builder.CreateSwitch(
  2285. CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
  2286. CS == nullptr ? 1 : CS->size());
  2287. if (CS) {
  2288. unsigned CaseNumber = 0;
  2289. for (auto *SubStmt : CS->children()) {
  2290. auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2291. CGF.EmitBlock(CaseBB);
  2292. SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
  2293. CGF.EmitStmt(SubStmt);
  2294. CGF.EmitBranch(ExitBB);
  2295. ++CaseNumber;
  2296. }
  2297. } else {
  2298. auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
  2299. CGF.EmitBlock(CaseBB);
  2300. SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
  2301. CGF.EmitStmt(Stmt);
  2302. CGF.EmitBranch(ExitBB);
  2303. }
  2304. CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
  2305. };
  2306. CodeGenFunction::OMPPrivateScope LoopScope(CGF);
  2307. if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
  2308. // Emit implicit barrier to synchronize threads and avoid data races on
  2309. // initialization of firstprivate variables and post-update of lastprivate
  2310. // variables.
  2311. CGF.CGM.getOpenMPRuntime().emitBarrierCall(
  2312. CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
  2313. /*ForceSimpleCall=*/true);
  2314. }
  2315. CGF.EmitOMPPrivateClause(S, LoopScope);
  2316. HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  2317. CGF.EmitOMPReductionClauseInit(S, LoopScope);
  2318. (void)LoopScope.Privatize();
  2319. // Emit static non-chunked loop.
  2320. OpenMPScheduleTy ScheduleKind;
  2321. ScheduleKind.Schedule = OMPC_SCHEDULE_static;
  2322. CGOpenMPRuntime::StaticRTInput StaticInit(
  2323. /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
  2324. LB.getAddress(), UB.getAddress(), ST.getAddress());
  2325. CGF.CGM.getOpenMPRuntime().emitForStaticInit(
  2326. CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
  2327. // UB = min(UB, GlobalUB);
  2328. auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
  2329. auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
  2330. CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
  2331. CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
  2332. // IV = LB;
  2333. CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
  2334. // while (idx <= UB) { BODY; ++idx; }
  2335. CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
  2336. [](CodeGenFunction &) {});
  2337. // Tell the runtime we are done.
  2338. auto &&CodeGen = [&S](CodeGenFunction &CGF) {
  2339. CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
  2340. S.getDirectiveKind());
  2341. };
  2342. CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
  2343. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  2344. // Emit post-update of the reduction variables if IsLastIter != 0.
  2345. emitPostUpdateForReductionClause(
  2346. CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
  2347. return CGF.Builder.CreateIsNotNull(
  2348. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2349. });
  2350. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2351. if (HasLastprivates)
  2352. CGF.EmitOMPLastprivateClauseFinal(
  2353. S, /*NoFinals=*/false,
  2354. CGF.Builder.CreateIsNotNull(
  2355. CGF.EmitLoadOfScalar(IL, S.getLocStart())));
  2356. };
  2357. bool HasCancel = false;
  2358. if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
  2359. HasCancel = OSD->hasCancel();
  2360. else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
  2361. HasCancel = OPSD->hasCancel();
  2362. OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
  2363. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
  2364. HasCancel);
  2365. // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
  2366. // clause. Otherwise the barrier will be generated by the codegen for the
  2367. // directive.
  2368. if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
  2369. // Emit implicit barrier to synchronize threads and avoid data races on
  2370. // initialization of firstprivate variables.
  2371. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
  2372. OMPD_unknown);
  2373. }
  2374. }
  2375. void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
  2376. {
  2377. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2378. EmitSections(S);
  2379. }
  2380. // Emit an implicit barrier at the end.
  2381. if (!S.getSingleClause<OMPNowaitClause>()) {
  2382. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
  2383. OMPD_sections);
  2384. }
  2385. }
  2386. void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
  2387. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2388. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  2389. };
  2390. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2391. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
  2392. S.hasCancel());
  2393. }
  2394. void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
  2395. llvm::SmallVector<const Expr *, 8> CopyprivateVars;
  2396. llvm::SmallVector<const Expr *, 8> DestExprs;
  2397. llvm::SmallVector<const Expr *, 8> SrcExprs;
  2398. llvm::SmallVector<const Expr *, 8> AssignmentOps;
  2399. // Check if there are any 'copyprivate' clauses associated with this
  2400. // 'single' construct.
  2401. // Build a list of copyprivate variables along with helper expressions
  2402. // (<source>, <destination>, <destination>=<source> expressions)
  2403. for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
  2404. CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
  2405. DestExprs.append(C->destination_exprs().begin(),
  2406. C->destination_exprs().end());
  2407. SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
  2408. AssignmentOps.append(C->assignment_ops().begin(),
  2409. C->assignment_ops().end());
  2410. }
  2411. // Emit code for 'single' region along with 'copyprivate' clauses
  2412. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2413. Action.Enter(CGF);
  2414. OMPPrivateScope SingleScope(CGF);
  2415. (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
  2416. CGF.EmitOMPPrivateClause(S, SingleScope);
  2417. (void)SingleScope.Privatize();
  2418. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  2419. };
  2420. {
  2421. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2422. CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
  2423. CopyprivateVars, DestExprs,
  2424. SrcExprs, AssignmentOps);
  2425. }
  2426. // Emit an implicit barrier at the end (to avoid data race on firstprivate
  2427. // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
  2428. if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
  2429. CGM.getOpenMPRuntime().emitBarrierCall(
  2430. *this, S.getLocStart(),
  2431. S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
  2432. }
  2433. }
  2434. void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
  2435. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2436. Action.Enter(CGF);
  2437. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  2438. };
  2439. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2440. CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
  2441. }
  2442. void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
  2443. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2444. Action.Enter(CGF);
  2445. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  2446. };
  2447. Expr *Hint = nullptr;
  2448. if (auto *HintClause = S.getSingleClause<OMPHintClause>())
  2449. Hint = HintClause->getHint();
  2450. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2451. CGM.getOpenMPRuntime().emitCriticalRegion(*this,
  2452. S.getDirectiveName().getAsString(),
  2453. CodeGen, S.getLocStart(), Hint);
  2454. }
  2455. void CodeGenFunction::EmitOMPParallelForDirective(
  2456. const OMPParallelForDirective &S) {
  2457. // Emit directive as a combined directive that consists of two implicit
  2458. // directives: 'parallel' with 'for' directive.
  2459. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2460. OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
  2461. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2462. emitDispatchForLoopBounds);
  2463. };
  2464. emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
  2465. emitEmptyBoundParameters);
  2466. }
  2467. void CodeGenFunction::EmitOMPParallelForSimdDirective(
  2468. const OMPParallelForSimdDirective &S) {
  2469. // Emit directive as a combined directive that consists of two implicit
  2470. // directives: 'parallel' with 'for' directive.
  2471. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2472. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  2473. emitDispatchForLoopBounds);
  2474. };
  2475. emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
  2476. emitEmptyBoundParameters);
  2477. }
  2478. void CodeGenFunction::EmitOMPParallelSectionsDirective(
  2479. const OMPParallelSectionsDirective &S) {
  2480. // Emit directive as a combined directive that consists of two implicit
  2481. // directives: 'parallel' with 'sections' directive.
  2482. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  2483. CGF.EmitSections(S);
  2484. };
  2485. emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
  2486. emitEmptyBoundParameters);
  2487. }
  2488. void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
  2489. const RegionCodeGenTy &BodyGen,
  2490. const TaskGenTy &TaskGen,
  2491. OMPTaskDataTy &Data) {
  2492. // Emit outlined function for task construct.
  2493. auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
  2494. auto *I = CS->getCapturedDecl()->param_begin();
  2495. auto *PartId = std::next(I);
  2496. auto *TaskT = std::next(I, 4);
  2497. // Check if the task is final
  2498. if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
  2499. // If the condition constant folds and can be elided, try to avoid emitting
  2500. // the condition and the dead arm of the if/else.
  2501. auto *Cond = Clause->getCondition();
  2502. bool CondConstant;
  2503. if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
  2504. Data.Final.setInt(CondConstant);
  2505. else
  2506. Data.Final.setPointer(EvaluateExprAsBool(Cond));
  2507. } else {
  2508. // By default the task is not final.
  2509. Data.Final.setInt(/*IntVal=*/false);
  2510. }
  2511. // Check if the task has 'priority' clause.
  2512. if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
  2513. auto *Prio = Clause->getPriority();
  2514. Data.Priority.setInt(/*IntVal=*/true);
  2515. Data.Priority.setPointer(EmitScalarConversion(
  2516. EmitScalarExpr(Prio), Prio->getType(),
  2517. getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
  2518. Prio->getExprLoc()));
  2519. }
  2520. // The first function argument for tasks is a thread id, the second one is a
  2521. // part id (0 for tied tasks, >=0 for untied task).
  2522. llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
  2523. // Get list of private variables.
  2524. for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
  2525. auto IRef = C->varlist_begin();
  2526. for (auto *IInit : C->private_copies()) {
  2527. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2528. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2529. Data.PrivateVars.push_back(*IRef);
  2530. Data.PrivateCopies.push_back(IInit);
  2531. }
  2532. ++IRef;
  2533. }
  2534. }
  2535. EmittedAsPrivate.clear();
  2536. // Get list of firstprivate variables.
  2537. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
  2538. auto IRef = C->varlist_begin();
  2539. auto IElemInitRef = C->inits().begin();
  2540. for (auto *IInit : C->private_copies()) {
  2541. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2542. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2543. Data.FirstprivateVars.push_back(*IRef);
  2544. Data.FirstprivateCopies.push_back(IInit);
  2545. Data.FirstprivateInits.push_back(*IElemInitRef);
  2546. }
  2547. ++IRef;
  2548. ++IElemInitRef;
  2549. }
  2550. }
  2551. // Get list of lastprivate variables (for taskloops).
  2552. llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
  2553. for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
  2554. auto IRef = C->varlist_begin();
  2555. auto ID = C->destination_exprs().begin();
  2556. for (auto *IInit : C->private_copies()) {
  2557. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
  2558. if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
  2559. Data.LastprivateVars.push_back(*IRef);
  2560. Data.LastprivateCopies.push_back(IInit);
  2561. }
  2562. LastprivateDstsOrigs.insert(
  2563. {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
  2564. cast<DeclRefExpr>(*IRef)});
  2565. ++IRef;
  2566. ++ID;
  2567. }
  2568. }
  2569. SmallVector<const Expr *, 4> LHSs;
  2570. SmallVector<const Expr *, 4> RHSs;
  2571. for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
  2572. auto IPriv = C->privates().begin();
  2573. auto IRed = C->reduction_ops().begin();
  2574. auto ILHS = C->lhs_exprs().begin();
  2575. auto IRHS = C->rhs_exprs().begin();
  2576. for (const auto *Ref : C->varlists()) {
  2577. Data.ReductionVars.emplace_back(Ref);
  2578. Data.ReductionCopies.emplace_back(*IPriv);
  2579. Data.ReductionOps.emplace_back(*IRed);
  2580. LHSs.emplace_back(*ILHS);
  2581. RHSs.emplace_back(*IRHS);
  2582. std::advance(IPriv, 1);
  2583. std::advance(IRed, 1);
  2584. std::advance(ILHS, 1);
  2585. std::advance(IRHS, 1);
  2586. }
  2587. }
  2588. Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
  2589. *this, S.getLocStart(), LHSs, RHSs, Data);
  2590. // Build list of dependences.
  2591. for (const auto *C : S.getClausesOfKind<OMPDependClause>())
  2592. for (auto *IRef : C->varlists())
  2593. Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
  2594. auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
  2595. CodeGenFunction &CGF, PrePostActionTy &Action) {
  2596. // Set proper addresses for generated private copies.
  2597. OMPPrivateScope Scope(CGF);
  2598. if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
  2599. !Data.LastprivateVars.empty()) {
  2600. enum { PrivatesParam = 2, CopyFnParam = 3 };
  2601. auto *CopyFn = CGF.Builder.CreateLoad(
  2602. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
  2603. auto *PrivatesPtr = CGF.Builder.CreateLoad(
  2604. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
  2605. // Map privates.
  2606. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
  2607. llvm::SmallVector<llvm::Value *, 16> CallArgs;
  2608. CallArgs.push_back(PrivatesPtr);
  2609. for (auto *E : Data.PrivateVars) {
  2610. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2611. Address PrivatePtr = CGF.CreateMemTemp(
  2612. CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
  2613. PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
  2614. CallArgs.push_back(PrivatePtr.getPointer());
  2615. }
  2616. for (auto *E : Data.FirstprivateVars) {
  2617. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2618. Address PrivatePtr =
  2619. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2620. ".firstpriv.ptr.addr");
  2621. PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
  2622. CallArgs.push_back(PrivatePtr.getPointer());
  2623. }
  2624. for (auto *E : Data.LastprivateVars) {
  2625. auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2626. Address PrivatePtr =
  2627. CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
  2628. ".lastpriv.ptr.addr");
  2629. PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
  2630. CallArgs.push_back(PrivatePtr.getPointer());
  2631. }
  2632. CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
  2633. CopyFn, CallArgs);
  2634. for (auto &&Pair : LastprivateDstsOrigs) {
  2635. auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
  2636. DeclRefExpr DRE(
  2637. const_cast<VarDecl *>(OrigVD),
  2638. /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
  2639. OrigVD) != nullptr,
  2640. Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
  2641. Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
  2642. return CGF.EmitLValue(&DRE).getAddress();
  2643. });
  2644. }
  2645. for (auto &&Pair : PrivatePtrs) {
  2646. Address Replacement(CGF.Builder.CreateLoad(Pair.second),
  2647. CGF.getContext().getDeclAlign(Pair.first));
  2648. Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
  2649. }
  2650. }
  2651. if (Data.Reductions) {
  2652. OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
  2653. ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
  2654. Data.ReductionOps);
  2655. llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
  2656. CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
  2657. for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
  2658. RedCG.emitSharedLValue(CGF, Cnt);
  2659. RedCG.emitAggregateType(CGF, Cnt);
  2660. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2661. CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2662. Replacement =
  2663. Address(CGF.EmitScalarConversion(
  2664. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2665. CGF.getContext().getPointerType(
  2666. Data.ReductionCopies[Cnt]->getType()),
  2667. SourceLocation()),
  2668. Replacement.getAlignment());
  2669. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2670. Scope.addPrivate(RedCG.getBaseDecl(Cnt),
  2671. [Replacement]() { return Replacement; });
  2672. // FIXME: This must removed once the runtime library is fixed.
  2673. // Emit required threadprivate variables for
  2674. // initilizer/combiner/finalizer.
  2675. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
  2676. RedCG, Cnt);
  2677. }
  2678. }
  2679. // Privatize all private variables except for in_reduction items.
  2680. (void)Scope.Privatize();
  2681. SmallVector<const Expr *, 4> InRedVars;
  2682. SmallVector<const Expr *, 4> InRedPrivs;
  2683. SmallVector<const Expr *, 4> InRedOps;
  2684. SmallVector<const Expr *, 4> TaskgroupDescriptors;
  2685. for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
  2686. auto IPriv = C->privates().begin();
  2687. auto IRed = C->reduction_ops().begin();
  2688. auto ITD = C->taskgroup_descriptors().begin();
  2689. for (const auto *Ref : C->varlists()) {
  2690. InRedVars.emplace_back(Ref);
  2691. InRedPrivs.emplace_back(*IPriv);
  2692. InRedOps.emplace_back(*IRed);
  2693. TaskgroupDescriptors.emplace_back(*ITD);
  2694. std::advance(IPriv, 1);
  2695. std::advance(IRed, 1);
  2696. std::advance(ITD, 1);
  2697. }
  2698. }
  2699. // Privatize in_reduction items here, because taskgroup descriptors must be
  2700. // privatized earlier.
  2701. OMPPrivateScope InRedScope(CGF);
  2702. if (!InRedVars.empty()) {
  2703. ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
  2704. for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
  2705. RedCG.emitSharedLValue(CGF, Cnt);
  2706. RedCG.emitAggregateType(CGF, Cnt);
  2707. // The taskgroup descriptor variable is always implicit firstprivate and
  2708. // privatized already during procoessing of the firstprivates.
  2709. llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(
  2710. CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation());
  2711. Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
  2712. CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
  2713. Replacement = Address(
  2714. CGF.EmitScalarConversion(
  2715. Replacement.getPointer(), CGF.getContext().VoidPtrTy,
  2716. CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()),
  2717. SourceLocation()),
  2718. Replacement.getAlignment());
  2719. Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
  2720. InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
  2721. [Replacement]() { return Replacement; });
  2722. // FIXME: This must removed once the runtime library is fixed.
  2723. // Emit required threadprivate variables for
  2724. // initilizer/combiner/finalizer.
  2725. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
  2726. RedCG, Cnt);
  2727. }
  2728. }
  2729. (void)InRedScope.Privatize();
  2730. Action.Enter(CGF);
  2731. BodyGen(CGF);
  2732. };
  2733. auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
  2734. S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
  2735. Data.NumberOfParts);
  2736. OMPLexicalScope Scope(*this, S);
  2737. TaskGen(*this, OutlinedFn, Data);
  2738. }
  2739. void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
  2740. // Emit outlined function for task construct.
  2741. auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
  2742. auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
  2743. auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  2744. const Expr *IfCond = nullptr;
  2745. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  2746. if (C->getNameModifier() == OMPD_unknown ||
  2747. C->getNameModifier() == OMPD_task) {
  2748. IfCond = C->getCondition();
  2749. break;
  2750. }
  2751. }
  2752. OMPTaskDataTy Data;
  2753. // Check if we should emit tied or untied task.
  2754. Data.Tied = !S.getSingleClause<OMPUntiedClause>();
  2755. auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
  2756. CGF.EmitStmt(CS->getCapturedStmt());
  2757. };
  2758. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  2759. IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
  2760. const OMPTaskDataTy &Data) {
  2761. CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
  2762. SharedsTy, CapturedStruct, IfCond,
  2763. Data);
  2764. };
  2765. EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
  2766. }
  2767. void CodeGenFunction::EmitOMPTaskyieldDirective(
  2768. const OMPTaskyieldDirective &S) {
  2769. CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
  2770. }
  2771. void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
  2772. CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
  2773. }
  2774. void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
  2775. CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
  2776. }
  2777. void CodeGenFunction::EmitOMPTaskgroupDirective(
  2778. const OMPTaskgroupDirective &S) {
  2779. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  2780. Action.Enter(CGF);
  2781. if (const Expr *E = S.getReductionRef()) {
  2782. SmallVector<const Expr *, 4> LHSs;
  2783. SmallVector<const Expr *, 4> RHSs;
  2784. OMPTaskDataTy Data;
  2785. for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
  2786. auto IPriv = C->privates().begin();
  2787. auto IRed = C->reduction_ops().begin();
  2788. auto ILHS = C->lhs_exprs().begin();
  2789. auto IRHS = C->rhs_exprs().begin();
  2790. for (const auto *Ref : C->varlists()) {
  2791. Data.ReductionVars.emplace_back(Ref);
  2792. Data.ReductionCopies.emplace_back(*IPriv);
  2793. Data.ReductionOps.emplace_back(*IRed);
  2794. LHSs.emplace_back(*ILHS);
  2795. RHSs.emplace_back(*IRHS);
  2796. std::advance(IPriv, 1);
  2797. std::advance(IRed, 1);
  2798. std::advance(ILHS, 1);
  2799. std::advance(IRHS, 1);
  2800. }
  2801. }
  2802. llvm::Value *ReductionDesc =
  2803. CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
  2804. LHSs, RHSs, Data);
  2805. const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
  2806. CGF.EmitVarDecl(*VD);
  2807. CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD),
  2808. /*Volatile=*/false, E->getType());
  2809. }
  2810. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  2811. };
  2812. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  2813. CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
  2814. }
  2815. void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
  2816. CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
  2817. if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
  2818. return llvm::makeArrayRef(FlushClause->varlist_begin(),
  2819. FlushClause->varlist_end());
  2820. }
  2821. return llvm::None;
  2822. }(), S.getLocStart());
  2823. }
  2824. void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
  2825. const CodeGenLoopTy &CodeGenLoop,
  2826. Expr *IncExpr) {
  2827. // Emit the loop iteration variable.
  2828. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
  2829. auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
  2830. EmitVarDecl(*IVDecl);
  2831. // Emit the iterations count variable.
  2832. // If it is not a variable, Sema decided to calculate iterations count on each
  2833. // iteration (e.g., it is foldable into a constant).
  2834. if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  2835. EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  2836. // Emit calculation of the iterations count.
  2837. EmitIgnoredExpr(S.getCalcLastIteration());
  2838. }
  2839. auto &RT = CGM.getOpenMPRuntime();
  2840. bool HasLastprivateClause = false;
  2841. // Check pre-condition.
  2842. {
  2843. OMPLoopScope PreInitScope(*this, S);
  2844. // Skip the entire loop if we don't meet the precondition.
  2845. // If the condition constant folds and can be elided, avoid emitting the
  2846. // whole loop.
  2847. bool CondConstant;
  2848. llvm::BasicBlock *ContBlock = nullptr;
  2849. if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  2850. if (!CondConstant)
  2851. return;
  2852. } else {
  2853. auto *ThenBlock = createBasicBlock("omp.precond.then");
  2854. ContBlock = createBasicBlock("omp.precond.end");
  2855. emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
  2856. getProfileCount(&S));
  2857. EmitBlock(ThenBlock);
  2858. incrementProfileCounter(&S);
  2859. }
  2860. emitAlignedClause(*this, S);
  2861. // Emit 'then' code.
  2862. {
  2863. // Emit helper vars inits.
  2864. LValue LB = EmitOMPHelperVar(
  2865. *this, cast<DeclRefExpr>(
  2866. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2867. ? S.getCombinedLowerBoundVariable()
  2868. : S.getLowerBoundVariable())));
  2869. LValue UB = EmitOMPHelperVar(
  2870. *this, cast<DeclRefExpr>(
  2871. (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2872. ? S.getCombinedUpperBoundVariable()
  2873. : S.getUpperBoundVariable())));
  2874. LValue ST =
  2875. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
  2876. LValue IL =
  2877. EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
  2878. OMPPrivateScope LoopScope(*this);
  2879. if (EmitOMPFirstprivateClause(S, LoopScope)) {
  2880. // Emit implicit barrier to synchronize threads and avoid data races
  2881. // on initialization of firstprivate variables and post-update of
  2882. // lastprivate variables.
  2883. CGM.getOpenMPRuntime().emitBarrierCall(
  2884. *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
  2885. /*ForceSimpleCall=*/true);
  2886. }
  2887. EmitOMPPrivateClause(S, LoopScope);
  2888. if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
  2889. !isOpenMPParallelDirective(S.getDirectiveKind()) &&
  2890. !isOpenMPTeamsDirective(S.getDirectiveKind()))
  2891. EmitOMPReductionClauseInit(S, LoopScope);
  2892. HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
  2893. EmitOMPPrivateLoopCounters(S, LoopScope);
  2894. (void)LoopScope.Privatize();
  2895. // Detect the distribute schedule kind and chunk.
  2896. llvm::Value *Chunk = nullptr;
  2897. OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
  2898. if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
  2899. ScheduleKind = C->getDistScheduleKind();
  2900. if (const auto *Ch = C->getChunkSize()) {
  2901. Chunk = EmitScalarExpr(Ch);
  2902. Chunk = EmitScalarConversion(Chunk, Ch->getType(),
  2903. S.getIterationVariable()->getType(),
  2904. S.getLocStart());
  2905. }
  2906. }
  2907. const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
  2908. const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
  2909. // OpenMP [2.10.8, distribute Construct, Description]
  2910. // If dist_schedule is specified, kind must be static. If specified,
  2911. // iterations are divided into chunks of size chunk_size, chunks are
  2912. // assigned to the teams of the league in a round-robin fashion in the
  2913. // order of the team number. When no chunk_size is specified, the
  2914. // iteration space is divided into chunks that are approximately equal
  2915. // in size, and at most one chunk is distributed to each team of the
  2916. // league. The size of the chunks is unspecified in this case.
  2917. if (RT.isStaticNonchunked(ScheduleKind,
  2918. /* Chunked */ Chunk != nullptr)) {
  2919. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  2920. EmitOMPSimdInit(S, /*IsMonotonic=*/true);
  2921. CGOpenMPRuntime::StaticRTInput StaticInit(
  2922. IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
  2923. LB.getAddress(), UB.getAddress(), ST.getAddress());
  2924. RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
  2925. StaticInit);
  2926. auto LoopExit =
  2927. getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
  2928. // UB = min(UB, GlobalUB);
  2929. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2930. ? S.getCombinedEnsureUpperBound()
  2931. : S.getEnsureUpperBound());
  2932. // IV = LB;
  2933. EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2934. ? S.getCombinedInit()
  2935. : S.getInit());
  2936. Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
  2937. ? S.getCombinedCond()
  2938. : S.getCond();
  2939. // for distribute alone, codegen
  2940. // while (idx <= UB) { BODY; ++idx; }
  2941. // when combined with 'for' (e.g. as in 'distribute parallel for')
  2942. // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
  2943. EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
  2944. [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
  2945. CodeGenLoop(CGF, S, LoopExit);
  2946. },
  2947. [](CodeGenFunction &) {});
  2948. EmitBlock(LoopExit.getBlock());
  2949. // Tell the runtime we are done.
  2950. RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
  2951. } else {
  2952. // Emit the outer loop, which requests its work chunk [LB..UB] from
  2953. // runtime and runs the inner loop to process it.
  2954. const OMPLoopArguments LoopArguments = {
  2955. LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
  2956. Chunk};
  2957. EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
  2958. CodeGenLoop);
  2959. }
  2960. if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  2961. EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
  2962. return CGF.Builder.CreateIsNotNull(
  2963. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2964. });
  2965. }
  2966. OpenMPDirectiveKind ReductionKind = OMPD_unknown;
  2967. if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
  2968. isOpenMPSimdDirective(S.getDirectiveKind())) {
  2969. ReductionKind = OMPD_parallel_for_simd;
  2970. } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
  2971. ReductionKind = OMPD_parallel_for;
  2972. } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
  2973. ReductionKind = OMPD_simd;
  2974. } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
  2975. S.hasClausesOfKind<OMPReductionClause>()) {
  2976. llvm_unreachable(
  2977. "No reduction clauses is allowed in distribute directive.");
  2978. }
  2979. EmitOMPReductionClauseFinal(S, ReductionKind);
  2980. // Emit post-update of the reduction variables if IsLastIter != 0.
  2981. emitPostUpdateForReductionClause(
  2982. *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
  2983. return CGF.Builder.CreateIsNotNull(
  2984. CGF.EmitLoadOfScalar(IL, S.getLocStart()));
  2985. });
  2986. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  2987. if (HasLastprivateClause) {
  2988. EmitOMPLastprivateClauseFinal(
  2989. S, /*NoFinals=*/false,
  2990. Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
  2991. }
  2992. }
  2993. // We're now done with the loop, so jump to the continuation block.
  2994. if (ContBlock) {
  2995. EmitBranch(ContBlock);
  2996. EmitBlock(ContBlock, true);
  2997. }
  2998. }
  2999. }
  3000. void CodeGenFunction::EmitOMPDistributeDirective(
  3001. const OMPDistributeDirective &S) {
  3002. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3003. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3004. };
  3005. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  3006. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
  3007. }
  3008. static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
  3009. const CapturedStmt *S) {
  3010. CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
  3011. CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
  3012. CGF.CapturedStmtInfo = &CapStmtInfo;
  3013. auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
  3014. Fn->addFnAttr(llvm::Attribute::NoInline);
  3015. return Fn;
  3016. }
  3017. void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
  3018. if (!S.getAssociatedStmt()) {
  3019. for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
  3020. CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
  3021. return;
  3022. }
  3023. auto *C = S.getSingleClause<OMPSIMDClause>();
  3024. auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
  3025. PrePostActionTy &Action) {
  3026. if (C) {
  3027. auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
  3028. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3029. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  3030. auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
  3031. CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
  3032. OutlinedFn, CapturedVars);
  3033. } else {
  3034. Action.Enter(CGF);
  3035. CGF.EmitStmt(
  3036. cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  3037. }
  3038. };
  3039. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  3040. CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
  3041. }
  3042. static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
  3043. QualType SrcType, QualType DestType,
  3044. SourceLocation Loc) {
  3045. assert(CGF.hasScalarEvaluationKind(DestType) &&
  3046. "DestType must have scalar evaluation kind.");
  3047. assert(!Val.isAggregate() && "Must be a scalar or complex.");
  3048. return Val.isScalar()
  3049. ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
  3050. Loc)
  3051. : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
  3052. DestType, Loc);
  3053. }
  3054. static CodeGenFunction::ComplexPairTy
  3055. convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
  3056. QualType DestType, SourceLocation Loc) {
  3057. assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
  3058. "DestType must have complex evaluation kind.");
  3059. CodeGenFunction::ComplexPairTy ComplexVal;
  3060. if (Val.isScalar()) {
  3061. // Convert the input element to the element type of the complex.
  3062. auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
  3063. auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
  3064. DestElementType, Loc);
  3065. ComplexVal = CodeGenFunction::ComplexPairTy(
  3066. ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
  3067. } else {
  3068. assert(Val.isComplex() && "Must be a scalar or complex.");
  3069. auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
  3070. auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
  3071. ComplexVal.first = CGF.EmitScalarConversion(
  3072. Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
  3073. ComplexVal.second = CGF.EmitScalarConversion(
  3074. Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
  3075. }
  3076. return ComplexVal;
  3077. }
  3078. static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
  3079. LValue LVal, RValue RVal) {
  3080. if (LVal.isGlobalReg()) {
  3081. CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
  3082. } else {
  3083. CGF.EmitAtomicStore(RVal, LVal,
  3084. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3085. : llvm::AtomicOrdering::Monotonic,
  3086. LVal.isVolatile(), /*IsInit=*/false);
  3087. }
  3088. }
  3089. void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
  3090. QualType RValTy, SourceLocation Loc) {
  3091. switch (getEvaluationKind(LVal.getType())) {
  3092. case TEK_Scalar:
  3093. EmitStoreThroughLValue(RValue::get(convertToScalarValue(
  3094. *this, RVal, RValTy, LVal.getType(), Loc)),
  3095. LVal);
  3096. break;
  3097. case TEK_Complex:
  3098. EmitStoreOfComplex(
  3099. convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
  3100. /*isInit=*/false);
  3101. break;
  3102. case TEK_Aggregate:
  3103. llvm_unreachable("Must be a scalar or complex.");
  3104. }
  3105. }
  3106. static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3107. const Expr *X, const Expr *V,
  3108. SourceLocation Loc) {
  3109. // v = x;
  3110. assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
  3111. assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
  3112. LValue XLValue = CGF.EmitLValue(X);
  3113. LValue VLValue = CGF.EmitLValue(V);
  3114. RValue Res = XLValue.isGlobalReg()
  3115. ? CGF.EmitLoadOfLValue(XLValue, Loc)
  3116. : CGF.EmitAtomicLoad(
  3117. XLValue, Loc,
  3118. IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3119. : llvm::AtomicOrdering::Monotonic,
  3120. XLValue.isVolatile());
  3121. // OpenMP, 2.12.6, atomic Construct
  3122. // Any atomic construct with a seq_cst clause forces the atomically
  3123. // performed operation to include an implicit flush operation without a
  3124. // list.
  3125. if (IsSeqCst)
  3126. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3127. CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
  3128. }
  3129. static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3130. const Expr *X, const Expr *E,
  3131. SourceLocation Loc) {
  3132. // x = expr;
  3133. assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
  3134. emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
  3135. // OpenMP, 2.12.6, atomic Construct
  3136. // Any atomic construct with a seq_cst clause forces the atomically
  3137. // performed operation to include an implicit flush operation without a
  3138. // list.
  3139. if (IsSeqCst)
  3140. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3141. }
  3142. static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
  3143. RValue Update,
  3144. BinaryOperatorKind BO,
  3145. llvm::AtomicOrdering AO,
  3146. bool IsXLHSInRHSPart) {
  3147. auto &Context = CGF.CGM.getContext();
  3148. // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
  3149. // expression is simple and atomic is allowed for the given type for the
  3150. // target platform.
  3151. if (BO == BO_Comma || !Update.isScalar() ||
  3152. !Update.getScalarVal()->getType()->isIntegerTy() ||
  3153. !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
  3154. (Update.getScalarVal()->getType() !=
  3155. X.getAddress().getElementType())) ||
  3156. !X.getAddress().getElementType()->isIntegerTy() ||
  3157. !Context.getTargetInfo().hasBuiltinAtomic(
  3158. Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
  3159. return std::make_pair(false, RValue::get(nullptr));
  3160. llvm::AtomicRMWInst::BinOp RMWOp;
  3161. switch (BO) {
  3162. case BO_Add:
  3163. RMWOp = llvm::AtomicRMWInst::Add;
  3164. break;
  3165. case BO_Sub:
  3166. if (!IsXLHSInRHSPart)
  3167. return std::make_pair(false, RValue::get(nullptr));
  3168. RMWOp = llvm::AtomicRMWInst::Sub;
  3169. break;
  3170. case BO_And:
  3171. RMWOp = llvm::AtomicRMWInst::And;
  3172. break;
  3173. case BO_Or:
  3174. RMWOp = llvm::AtomicRMWInst::Or;
  3175. break;
  3176. case BO_Xor:
  3177. RMWOp = llvm::AtomicRMWInst::Xor;
  3178. break;
  3179. case BO_LT:
  3180. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3181. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
  3182. : llvm::AtomicRMWInst::Max)
  3183. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
  3184. : llvm::AtomicRMWInst::UMax);
  3185. break;
  3186. case BO_GT:
  3187. RMWOp = X.getType()->hasSignedIntegerRepresentation()
  3188. ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
  3189. : llvm::AtomicRMWInst::Min)
  3190. : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
  3191. : llvm::AtomicRMWInst::UMin);
  3192. break;
  3193. case BO_Assign:
  3194. RMWOp = llvm::AtomicRMWInst::Xchg;
  3195. break;
  3196. case BO_Mul:
  3197. case BO_Div:
  3198. case BO_Rem:
  3199. case BO_Shl:
  3200. case BO_Shr:
  3201. case BO_LAnd:
  3202. case BO_LOr:
  3203. return std::make_pair(false, RValue::get(nullptr));
  3204. case BO_PtrMemD:
  3205. case BO_PtrMemI:
  3206. case BO_LE:
  3207. case BO_GE:
  3208. case BO_EQ:
  3209. case BO_NE:
  3210. case BO_AddAssign:
  3211. case BO_SubAssign:
  3212. case BO_AndAssign:
  3213. case BO_OrAssign:
  3214. case BO_XorAssign:
  3215. case BO_MulAssign:
  3216. case BO_DivAssign:
  3217. case BO_RemAssign:
  3218. case BO_ShlAssign:
  3219. case BO_ShrAssign:
  3220. case BO_Comma:
  3221. llvm_unreachable("Unsupported atomic update operation");
  3222. }
  3223. auto *UpdateVal = Update.getScalarVal();
  3224. if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
  3225. UpdateVal = CGF.Builder.CreateIntCast(
  3226. IC, X.getAddress().getElementType(),
  3227. X.getType()->hasSignedIntegerRepresentation());
  3228. }
  3229. auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
  3230. return std::make_pair(true, RValue::get(Res));
  3231. }
  3232. std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
  3233. LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
  3234. llvm::AtomicOrdering AO, SourceLocation Loc,
  3235. const llvm::function_ref<RValue(RValue)> &CommonGen) {
  3236. // Update expressions are allowed to have the following forms:
  3237. // x binop= expr; -> xrval + expr;
  3238. // x++, ++x -> xrval + 1;
  3239. // x--, --x -> xrval - 1;
  3240. // x = x binop expr; -> xrval binop expr
  3241. // x = expr Op x; - > expr binop xrval;
  3242. auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
  3243. if (!Res.first) {
  3244. if (X.isGlobalReg()) {
  3245. // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
  3246. // 'xrval'.
  3247. EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
  3248. } else {
  3249. // Perform compare-and-swap procedure.
  3250. EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
  3251. }
  3252. }
  3253. return Res;
  3254. }
  3255. static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3256. const Expr *X, const Expr *E,
  3257. const Expr *UE, bool IsXLHSInRHSPart,
  3258. SourceLocation Loc) {
  3259. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3260. "Update expr in 'atomic update' must be a binary operator.");
  3261. auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3262. // Update expressions are allowed to have the following forms:
  3263. // x binop= expr; -> xrval + expr;
  3264. // x++, ++x -> xrval + 1;
  3265. // x--, --x -> xrval - 1;
  3266. // x = x binop expr; -> xrval binop expr
  3267. // x = expr Op x; - > expr binop xrval;
  3268. assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
  3269. LValue XLValue = CGF.EmitLValue(X);
  3270. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3271. auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3272. : llvm::AtomicOrdering::Monotonic;
  3273. auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3274. auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3275. auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3276. auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3277. auto Gen =
  3278. [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
  3279. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3280. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3281. return CGF.EmitAnyExpr(UE);
  3282. };
  3283. (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
  3284. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3285. // OpenMP, 2.12.6, atomic Construct
  3286. // Any atomic construct with a seq_cst clause forces the atomically
  3287. // performed operation to include an implicit flush operation without a
  3288. // list.
  3289. if (IsSeqCst)
  3290. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3291. }
  3292. static RValue convertToType(CodeGenFunction &CGF, RValue Value,
  3293. QualType SourceType, QualType ResType,
  3294. SourceLocation Loc) {
  3295. switch (CGF.getEvaluationKind(ResType)) {
  3296. case TEK_Scalar:
  3297. return RValue::get(
  3298. convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
  3299. case TEK_Complex: {
  3300. auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
  3301. return RValue::getComplex(Res.first, Res.second);
  3302. }
  3303. case TEK_Aggregate:
  3304. break;
  3305. }
  3306. llvm_unreachable("Must be a scalar or complex.");
  3307. }
  3308. static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
  3309. bool IsPostfixUpdate, const Expr *V,
  3310. const Expr *X, const Expr *E,
  3311. const Expr *UE, bool IsXLHSInRHSPart,
  3312. SourceLocation Loc) {
  3313. assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
  3314. assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
  3315. RValue NewVVal;
  3316. LValue VLValue = CGF.EmitLValue(V);
  3317. LValue XLValue = CGF.EmitLValue(X);
  3318. RValue ExprRValue = CGF.EmitAnyExpr(E);
  3319. auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
  3320. : llvm::AtomicOrdering::Monotonic;
  3321. QualType NewVValType;
  3322. if (UE) {
  3323. // 'x' is updated with some additional value.
  3324. assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
  3325. "Update expr in 'atomic capture' must be a binary operator.");
  3326. auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
  3327. // Update expressions are allowed to have the following forms:
  3328. // x binop= expr; -> xrval + expr;
  3329. // x++, ++x -> xrval + 1;
  3330. // x--, --x -> xrval - 1;
  3331. // x = x binop expr; -> xrval binop expr
  3332. // x = expr Op x; - > expr binop xrval;
  3333. auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
  3334. auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
  3335. auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
  3336. NewVValType = XRValExpr->getType();
  3337. auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
  3338. auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
  3339. IsPostfixUpdate](RValue XRValue) -> RValue {
  3340. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3341. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
  3342. RValue Res = CGF.EmitAnyExpr(UE);
  3343. NewVVal = IsPostfixUpdate ? XRValue : Res;
  3344. return Res;
  3345. };
  3346. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3347. XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
  3348. if (Res.first) {
  3349. // 'atomicrmw' instruction was generated.
  3350. if (IsPostfixUpdate) {
  3351. // Use old value from 'atomicrmw'.
  3352. NewVVal = Res.second;
  3353. } else {
  3354. // 'atomicrmw' does not provide new value, so evaluate it using old
  3355. // value of 'x'.
  3356. CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
  3357. CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
  3358. NewVVal = CGF.EmitAnyExpr(UE);
  3359. }
  3360. }
  3361. } else {
  3362. // 'x' is simply rewritten with some 'expr'.
  3363. NewVValType = X->getType().getNonReferenceType();
  3364. ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
  3365. X->getType().getNonReferenceType(), Loc);
  3366. auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
  3367. NewVVal = XRValue;
  3368. return ExprRValue;
  3369. };
  3370. // Try to perform atomicrmw xchg, otherwise simple exchange.
  3371. auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
  3372. XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
  3373. Loc, Gen);
  3374. if (Res.first) {
  3375. // 'atomicrmw' instruction was generated.
  3376. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
  3377. }
  3378. }
  3379. // Emit post-update store to 'v' of old/new 'x' value.
  3380. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
  3381. // OpenMP, 2.12.6, atomic Construct
  3382. // Any atomic construct with a seq_cst clause forces the atomically
  3383. // performed operation to include an implicit flush operation without a
  3384. // list.
  3385. if (IsSeqCst)
  3386. CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
  3387. }
  3388. static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
  3389. bool IsSeqCst, bool IsPostfixUpdate,
  3390. const Expr *X, const Expr *V, const Expr *E,
  3391. const Expr *UE, bool IsXLHSInRHSPart,
  3392. SourceLocation Loc) {
  3393. switch (Kind) {
  3394. case OMPC_read:
  3395. EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
  3396. break;
  3397. case OMPC_write:
  3398. EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
  3399. break;
  3400. case OMPC_unknown:
  3401. case OMPC_update:
  3402. EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
  3403. break;
  3404. case OMPC_capture:
  3405. EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
  3406. IsXLHSInRHSPart, Loc);
  3407. break;
  3408. case OMPC_if:
  3409. case OMPC_final:
  3410. case OMPC_num_threads:
  3411. case OMPC_private:
  3412. case OMPC_firstprivate:
  3413. case OMPC_lastprivate:
  3414. case OMPC_reduction:
  3415. case OMPC_task_reduction:
  3416. case OMPC_in_reduction:
  3417. case OMPC_safelen:
  3418. case OMPC_simdlen:
  3419. case OMPC_collapse:
  3420. case OMPC_default:
  3421. case OMPC_seq_cst:
  3422. case OMPC_shared:
  3423. case OMPC_linear:
  3424. case OMPC_aligned:
  3425. case OMPC_copyin:
  3426. case OMPC_copyprivate:
  3427. case OMPC_flush:
  3428. case OMPC_proc_bind:
  3429. case OMPC_schedule:
  3430. case OMPC_ordered:
  3431. case OMPC_nowait:
  3432. case OMPC_untied:
  3433. case OMPC_threadprivate:
  3434. case OMPC_depend:
  3435. case OMPC_mergeable:
  3436. case OMPC_device:
  3437. case OMPC_threads:
  3438. case OMPC_simd:
  3439. case OMPC_map:
  3440. case OMPC_num_teams:
  3441. case OMPC_thread_limit:
  3442. case OMPC_priority:
  3443. case OMPC_grainsize:
  3444. case OMPC_nogroup:
  3445. case OMPC_num_tasks:
  3446. case OMPC_hint:
  3447. case OMPC_dist_schedule:
  3448. case OMPC_defaultmap:
  3449. case OMPC_uniform:
  3450. case OMPC_to:
  3451. case OMPC_from:
  3452. case OMPC_use_device_ptr:
  3453. case OMPC_is_device_ptr:
  3454. llvm_unreachable("Clause is not allowed in 'omp atomic'.");
  3455. }
  3456. }
  3457. void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
  3458. bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
  3459. OpenMPClauseKind Kind = OMPC_unknown;
  3460. for (auto *C : S.clauses()) {
  3461. // Find first clause (skip seq_cst clause, if it is first).
  3462. if (C->getClauseKind() != OMPC_seq_cst) {
  3463. Kind = C->getClauseKind();
  3464. break;
  3465. }
  3466. }
  3467. const auto *CS =
  3468. S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
  3469. if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
  3470. enterFullExpression(EWC);
  3471. }
  3472. // Processing for statements under 'atomic capture'.
  3473. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
  3474. for (const auto *C : Compound->body()) {
  3475. if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
  3476. enterFullExpression(EWC);
  3477. }
  3478. }
  3479. }
  3480. auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
  3481. PrePostActionTy &) {
  3482. CGF.EmitStopPoint(CS);
  3483. EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
  3484. S.getV(), S.getExpr(), S.getUpdateExpr(),
  3485. S.isXLHSInRHSPart(), S.getLocStart());
  3486. };
  3487. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  3488. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
  3489. }
  3490. static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
  3491. const OMPExecutableDirective &S,
  3492. const RegionCodeGenTy &CodeGen) {
  3493. assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
  3494. CodeGenModule &CGM = CGF.CGM;
  3495. const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target);
  3496. llvm::Function *Fn = nullptr;
  3497. llvm::Constant *FnID = nullptr;
  3498. const Expr *IfCond = nullptr;
  3499. // Check for the at most one if clause associated with the target region.
  3500. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3501. if (C->getNameModifier() == OMPD_unknown ||
  3502. C->getNameModifier() == OMPD_target) {
  3503. IfCond = C->getCondition();
  3504. break;
  3505. }
  3506. }
  3507. // Check if we have any device clause associated with the directive.
  3508. const Expr *Device = nullptr;
  3509. if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
  3510. Device = C->getDevice();
  3511. }
  3512. // Check if we have an if clause whose conditional always evaluates to false
  3513. // or if we do not have any targets specified. If so the target region is not
  3514. // an offload entry point.
  3515. bool IsOffloadEntry = true;
  3516. if (IfCond) {
  3517. bool Val;
  3518. if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
  3519. IsOffloadEntry = false;
  3520. }
  3521. if (CGM.getLangOpts().OMPTargetTriples.empty())
  3522. IsOffloadEntry = false;
  3523. assert(CGF.CurFuncDecl && "No parent declaration for target region!");
  3524. StringRef ParentName;
  3525. // In case we have Ctors/Dtors we use the complete type variant to produce
  3526. // the mangling of the device outlined kernel.
  3527. if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
  3528. ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
  3529. else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
  3530. ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
  3531. else
  3532. ParentName =
  3533. CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
  3534. // Emit target region as a standalone region.
  3535. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
  3536. IsOffloadEntry, CodeGen);
  3537. OMPLexicalScope Scope(CGF, S);
  3538. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3539. CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
  3540. CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
  3541. CapturedVars);
  3542. }
  3543. static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
  3544. PrePostActionTy &Action) {
  3545. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3546. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3547. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3548. (void)PrivateScope.Privatize();
  3549. Action.Enter(CGF);
  3550. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  3551. }
  3552. void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
  3553. StringRef ParentName,
  3554. const OMPTargetDirective &S) {
  3555. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3556. emitTargetRegion(CGF, S, Action);
  3557. };
  3558. llvm::Function *Fn;
  3559. llvm::Constant *Addr;
  3560. // Emit target region as a standalone region.
  3561. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3562. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3563. assert(Fn && Addr && "Target device function emission failed.");
  3564. }
  3565. void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
  3566. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3567. emitTargetRegion(CGF, S, Action);
  3568. };
  3569. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3570. }
  3571. static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
  3572. const OMPExecutableDirective &S,
  3573. OpenMPDirectiveKind InnermostKind,
  3574. const RegionCodeGenTy &CodeGen) {
  3575. const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
  3576. auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
  3577. S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
  3578. const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
  3579. const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
  3580. if (NT || TL) {
  3581. Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
  3582. Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
  3583. CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
  3584. S.getLocStart());
  3585. }
  3586. OMPTeamsScope Scope(CGF, S);
  3587. llvm::SmallVector<llvm::Value *, 16> CapturedVars;
  3588. CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
  3589. CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
  3590. CapturedVars);
  3591. }
  3592. void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
  3593. // Emit teams region as a standalone region.
  3594. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3595. OMPPrivateScope PrivateScope(CGF);
  3596. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3597. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3598. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3599. (void)PrivateScope.Privatize();
  3600. CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  3601. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3602. };
  3603. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  3604. emitPostUpdateForReductionClause(
  3605. *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  3606. }
  3607. static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  3608. const OMPTargetTeamsDirective &S) {
  3609. auto *CS = S.getCapturedStmt(OMPD_teams);
  3610. Action.Enter(CGF);
  3611. // Emit teams region as a standalone region.
  3612. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3613. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3614. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  3615. CGF.EmitOMPPrivateClause(S, PrivateScope);
  3616. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3617. (void)PrivateScope.Privatize();
  3618. Action.Enter(CGF);
  3619. CGF.EmitStmt(CS->getCapturedStmt());
  3620. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3621. };
  3622. emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
  3623. emitPostUpdateForReductionClause(
  3624. CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  3625. }
  3626. void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
  3627. CodeGenModule &CGM, StringRef ParentName,
  3628. const OMPTargetTeamsDirective &S) {
  3629. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3630. emitTargetTeamsRegion(CGF, Action, S);
  3631. };
  3632. llvm::Function *Fn;
  3633. llvm::Constant *Addr;
  3634. // Emit target region as a standalone region.
  3635. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3636. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3637. assert(Fn && Addr && "Target device function emission failed.");
  3638. }
  3639. void CodeGenFunction::EmitOMPTargetTeamsDirective(
  3640. const OMPTargetTeamsDirective &S) {
  3641. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3642. emitTargetTeamsRegion(CGF, Action, S);
  3643. };
  3644. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3645. }
  3646. static void
  3647. emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
  3648. const OMPTargetTeamsDistributeDirective &S) {
  3649. Action.Enter(CGF);
  3650. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3651. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3652. };
  3653. // Emit teams region as a standalone region.
  3654. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3655. PrePostActionTy &) {
  3656. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3657. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3658. (void)PrivateScope.Privatize();
  3659. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  3660. CodeGenDistribute);
  3661. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3662. };
  3663. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
  3664. emitPostUpdateForReductionClause(CGF, S,
  3665. [](CodeGenFunction &) { return nullptr; });
  3666. }
  3667. void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
  3668. CodeGenModule &CGM, StringRef ParentName,
  3669. const OMPTargetTeamsDistributeDirective &S) {
  3670. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3671. emitTargetTeamsDistributeRegion(CGF, Action, S);
  3672. };
  3673. llvm::Function *Fn;
  3674. llvm::Constant *Addr;
  3675. // Emit target region as a standalone region.
  3676. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3677. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3678. assert(Fn && Addr && "Target device function emission failed.");
  3679. }
  3680. void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
  3681. const OMPTargetTeamsDistributeDirective &S) {
  3682. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3683. emitTargetTeamsDistributeRegion(CGF, Action, S);
  3684. };
  3685. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3686. }
  3687. static void emitTargetTeamsDistributeSimdRegion(
  3688. CodeGenFunction &CGF, PrePostActionTy &Action,
  3689. const OMPTargetTeamsDistributeSimdDirective &S) {
  3690. Action.Enter(CGF);
  3691. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3692. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3693. };
  3694. // Emit teams region as a standalone region.
  3695. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3696. PrePostActionTy &) {
  3697. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  3698. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3699. (void)PrivateScope.Privatize();
  3700. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  3701. CodeGenDistribute);
  3702. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3703. };
  3704. emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
  3705. emitPostUpdateForReductionClause(CGF, S,
  3706. [](CodeGenFunction &) { return nullptr; });
  3707. }
  3708. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
  3709. CodeGenModule &CGM, StringRef ParentName,
  3710. const OMPTargetTeamsDistributeSimdDirective &S) {
  3711. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3712. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  3713. };
  3714. llvm::Function *Fn;
  3715. llvm::Constant *Addr;
  3716. // Emit target region as a standalone region.
  3717. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  3718. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  3719. assert(Fn && Addr && "Target device function emission failed.");
  3720. }
  3721. void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
  3722. const OMPTargetTeamsDistributeSimdDirective &S) {
  3723. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  3724. emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
  3725. };
  3726. emitCommonOMPTargetDirective(*this, S, CodeGen);
  3727. }
  3728. void CodeGenFunction::EmitOMPTeamsDistributeDirective(
  3729. const OMPTeamsDistributeDirective &S) {
  3730. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3731. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3732. };
  3733. // Emit teams region as a standalone region.
  3734. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3735. PrePostActionTy &) {
  3736. OMPPrivateScope PrivateScope(CGF);
  3737. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3738. (void)PrivateScope.Privatize();
  3739. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  3740. CodeGenDistribute);
  3741. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3742. };
  3743. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
  3744. emitPostUpdateForReductionClause(*this, S,
  3745. [](CodeGenFunction &) { return nullptr; });
  3746. }
  3747. void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
  3748. const OMPTeamsDistributeSimdDirective &S) {
  3749. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3750. CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
  3751. };
  3752. // Emit teams region as a standalone region.
  3753. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3754. PrePostActionTy &) {
  3755. OMPPrivateScope PrivateScope(CGF);
  3756. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3757. (void)PrivateScope.Privatize();
  3758. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
  3759. CodeGenDistribute);
  3760. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3761. };
  3762. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
  3763. emitPostUpdateForReductionClause(*this, S,
  3764. [](CodeGenFunction &) { return nullptr; });
  3765. }
  3766. void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
  3767. const OMPTeamsDistributeParallelForDirective &S) {
  3768. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3769. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  3770. S.getDistInc());
  3771. };
  3772. // Emit teams region as a standalone region.
  3773. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3774. PrePostActionTy &) {
  3775. OMPPrivateScope PrivateScope(CGF);
  3776. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3777. (void)PrivateScope.Privatize();
  3778. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
  3779. CodeGenDistribute);
  3780. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3781. };
  3782. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  3783. emitPostUpdateForReductionClause(*this, S,
  3784. [](CodeGenFunction &) { return nullptr; });
  3785. }
  3786. void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
  3787. const OMPTeamsDistributeParallelForSimdDirective &S) {
  3788. auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3789. CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
  3790. S.getDistInc());
  3791. };
  3792. // Emit teams region as a standalone region.
  3793. auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
  3794. PrePostActionTy &) {
  3795. OMPPrivateScope PrivateScope(CGF);
  3796. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  3797. (void)PrivateScope.Privatize();
  3798. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
  3799. CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
  3800. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
  3801. };
  3802. emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
  3803. emitPostUpdateForReductionClause(*this, S,
  3804. [](CodeGenFunction &) { return nullptr; });
  3805. }
  3806. void CodeGenFunction::EmitOMPCancellationPointDirective(
  3807. const OMPCancellationPointDirective &S) {
  3808. CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
  3809. S.getCancelRegion());
  3810. }
  3811. void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
  3812. const Expr *IfCond = nullptr;
  3813. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  3814. if (C->getNameModifier() == OMPD_unknown ||
  3815. C->getNameModifier() == OMPD_cancel) {
  3816. IfCond = C->getCondition();
  3817. break;
  3818. }
  3819. }
  3820. CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
  3821. S.getCancelRegion());
  3822. }
  3823. CodeGenFunction::JumpDest
  3824. CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
  3825. if (Kind == OMPD_parallel || Kind == OMPD_task ||
  3826. Kind == OMPD_target_parallel)
  3827. return ReturnBlock;
  3828. assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
  3829. Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
  3830. Kind == OMPD_distribute_parallel_for ||
  3831. Kind == OMPD_target_parallel_for ||
  3832. Kind == OMPD_teams_distribute_parallel_for ||
  3833. Kind == OMPD_target_teams_distribute_parallel_for);
  3834. return OMPCancelStack.getExitBlock();
  3835. }
  3836. void CodeGenFunction::EmitOMPUseDevicePtrClause(
  3837. const OMPClause &NC, OMPPrivateScope &PrivateScope,
  3838. const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
  3839. const auto &C = cast<OMPUseDevicePtrClause>(NC);
  3840. auto OrigVarIt = C.varlist_begin();
  3841. auto InitIt = C.inits().begin();
  3842. for (auto PvtVarIt : C.private_copies()) {
  3843. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
  3844. auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
  3845. auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
  3846. // In order to identify the right initializer we need to match the
  3847. // declaration used by the mapping logic. In some cases we may get
  3848. // OMPCapturedExprDecl that refers to the original declaration.
  3849. const ValueDecl *MatchingVD = OrigVD;
  3850. if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
  3851. // OMPCapturedExprDecl are used to privative fields of the current
  3852. // structure.
  3853. auto *ME = cast<MemberExpr>(OED->getInit());
  3854. assert(isa<CXXThisExpr>(ME->getBase()) &&
  3855. "Base should be the current struct!");
  3856. MatchingVD = ME->getMemberDecl();
  3857. }
  3858. // If we don't have information about the current list item, move on to
  3859. // the next one.
  3860. auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
  3861. if (InitAddrIt == CaptureDeviceAddrMap.end())
  3862. continue;
  3863. bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
  3864. // Initialize the temporary initialization variable with the address we
  3865. // get from the runtime library. We have to cast the source address
  3866. // because it is always a void *. References are materialized in the
  3867. // privatization scope, so the initialization here disregards the fact
  3868. // the original variable is a reference.
  3869. QualType AddrQTy =
  3870. getContext().getPointerType(OrigVD->getType().getNonReferenceType());
  3871. llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
  3872. Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
  3873. setAddrOfLocalVar(InitVD, InitAddr);
  3874. // Emit private declaration, it will be initialized by the value we
  3875. // declaration we just added to the local declarations map.
  3876. EmitDecl(*PvtVD);
  3877. // The initialization variables reached its purpose in the emission
  3878. // ofthe previous declaration, so we don't need it anymore.
  3879. LocalDeclMap.erase(InitVD);
  3880. // Return the address of the private variable.
  3881. return GetAddrOfLocalVar(PvtVD);
  3882. });
  3883. assert(IsRegistered && "firstprivate var already registered as private");
  3884. // Silence the warning about unused variable.
  3885. (void)IsRegistered;
  3886. ++OrigVarIt;
  3887. ++InitIt;
  3888. }
  3889. }
  3890. // Generate the instructions for '#pragma omp target data' directive.
  3891. void CodeGenFunction::EmitOMPTargetDataDirective(
  3892. const OMPTargetDataDirective &S) {
  3893. CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
  3894. // Create a pre/post action to signal the privatization of the device pointer.
  3895. // This action can be replaced by the OpenMP runtime code generation to
  3896. // deactivate privatization.
  3897. bool PrivatizeDevicePointers = false;
  3898. class DevicePointerPrivActionTy : public PrePostActionTy {
  3899. bool &PrivatizeDevicePointers;
  3900. public:
  3901. explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
  3902. : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
  3903. void Enter(CodeGenFunction &CGF) override {
  3904. PrivatizeDevicePointers = true;
  3905. }
  3906. };
  3907. DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
  3908. auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
  3909. CodeGenFunction &CGF, PrePostActionTy &Action) {
  3910. auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  3911. CGF.EmitStmt(
  3912. cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
  3913. };
  3914. // Codegen that selects wheather to generate the privatization code or not.
  3915. auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
  3916. &InnermostCodeGen](CodeGenFunction &CGF,
  3917. PrePostActionTy &Action) {
  3918. RegionCodeGenTy RCG(InnermostCodeGen);
  3919. PrivatizeDevicePointers = false;
  3920. // Call the pre-action to change the status of PrivatizeDevicePointers if
  3921. // needed.
  3922. Action.Enter(CGF);
  3923. if (PrivatizeDevicePointers) {
  3924. OMPPrivateScope PrivateScope(CGF);
  3925. // Emit all instances of the use_device_ptr clause.
  3926. for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
  3927. CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
  3928. Info.CaptureDeviceAddrMap);
  3929. (void)PrivateScope.Privatize();
  3930. RCG(CGF);
  3931. } else
  3932. RCG(CGF);
  3933. };
  3934. // Forward the provided action to the privatization codegen.
  3935. RegionCodeGenTy PrivRCG(PrivCodeGen);
  3936. PrivRCG.setAction(Action);
  3937. // Notwithstanding the body of the region is emitted as inlined directive,
  3938. // we don't use an inline scope as changes in the references inside the
  3939. // region are expected to be visible outside, so we do not privative them.
  3940. OMPLexicalScope Scope(CGF, S);
  3941. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
  3942. PrivRCG);
  3943. };
  3944. RegionCodeGenTy RCG(CodeGen);
  3945. // If we don't have target devices, don't bother emitting the data mapping
  3946. // code.
  3947. if (CGM.getLangOpts().OMPTargetTriples.empty()) {
  3948. RCG(*this);
  3949. return;
  3950. }
  3951. // Check if we have any if clause associated with the directive.
  3952. const Expr *IfCond = nullptr;
  3953. if (auto *C = S.getSingleClause<OMPIfClause>())
  3954. IfCond = C->getCondition();
  3955. // Check if we have any device clause associated with the directive.
  3956. const Expr *Device = nullptr;
  3957. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  3958. Device = C->getDevice();
  3959. // Set the action to signal privatization of device pointers.
  3960. RCG.setAction(PrivAction);
  3961. // Emit region code.
  3962. CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
  3963. Info);
  3964. }
  3965. void CodeGenFunction::EmitOMPTargetEnterDataDirective(
  3966. const OMPTargetEnterDataDirective &S) {
  3967. // If we don't have target devices, don't bother emitting the data mapping
  3968. // code.
  3969. if (CGM.getLangOpts().OMPTargetTriples.empty())
  3970. return;
  3971. // Check if we have any if clause associated with the directive.
  3972. const Expr *IfCond = nullptr;
  3973. if (auto *C = S.getSingleClause<OMPIfClause>())
  3974. IfCond = C->getCondition();
  3975. // Check if we have any device clause associated with the directive.
  3976. const Expr *Device = nullptr;
  3977. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  3978. Device = C->getDevice();
  3979. auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
  3980. PrePostActionTy &) {
  3981. CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
  3982. Device);
  3983. };
  3984. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  3985. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
  3986. CodeGen);
  3987. }
  3988. void CodeGenFunction::EmitOMPTargetExitDataDirective(
  3989. const OMPTargetExitDataDirective &S) {
  3990. // If we don't have target devices, don't bother emitting the data mapping
  3991. // code.
  3992. if (CGM.getLangOpts().OMPTargetTriples.empty())
  3993. return;
  3994. // Check if we have any if clause associated with the directive.
  3995. const Expr *IfCond = nullptr;
  3996. if (auto *C = S.getSingleClause<OMPIfClause>())
  3997. IfCond = C->getCondition();
  3998. // Check if we have any device clause associated with the directive.
  3999. const Expr *Device = nullptr;
  4000. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  4001. Device = C->getDevice();
  4002. auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
  4003. PrePostActionTy &) {
  4004. CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
  4005. Device);
  4006. };
  4007. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  4008. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
  4009. CodeGen);
  4010. }
  4011. static void emitTargetParallelRegion(CodeGenFunction &CGF,
  4012. const OMPTargetParallelDirective &S,
  4013. PrePostActionTy &Action) {
  4014. // Get the captured statement associated with the 'parallel' region.
  4015. auto *CS = S.getCapturedStmt(OMPD_parallel);
  4016. Action.Enter(CGF);
  4017. auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
  4018. CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
  4019. (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
  4020. CGF.EmitOMPPrivateClause(S, PrivateScope);
  4021. CGF.EmitOMPReductionClauseInit(S, PrivateScope);
  4022. (void)PrivateScope.Privatize();
  4023. // TODO: Add support for clauses.
  4024. CGF.EmitStmt(CS->getCapturedStmt());
  4025. CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
  4026. };
  4027. emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
  4028. emitEmptyBoundParameters);
  4029. emitPostUpdateForReductionClause(
  4030. CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
  4031. }
  4032. void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
  4033. CodeGenModule &CGM, StringRef ParentName,
  4034. const OMPTargetParallelDirective &S) {
  4035. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4036. emitTargetParallelRegion(CGF, S, Action);
  4037. };
  4038. llvm::Function *Fn;
  4039. llvm::Constant *Addr;
  4040. // Emit target region as a standalone region.
  4041. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4042. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4043. assert(Fn && Addr && "Target device function emission failed.");
  4044. }
  4045. void CodeGenFunction::EmitOMPTargetParallelDirective(
  4046. const OMPTargetParallelDirective &S) {
  4047. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4048. emitTargetParallelRegion(CGF, S, Action);
  4049. };
  4050. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4051. }
  4052. static void emitTargetParallelForRegion(CodeGenFunction &CGF,
  4053. const OMPTargetParallelForDirective &S,
  4054. PrePostActionTy &Action) {
  4055. Action.Enter(CGF);
  4056. // Emit directive as a combined directive that consists of two implicit
  4057. // directives: 'parallel' with 'for' directive.
  4058. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4059. CodeGenFunction::OMPCancelStackRAII CancelRegion(
  4060. CGF, OMPD_target_parallel_for, S.hasCancel());
  4061. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4062. emitDispatchForLoopBounds);
  4063. };
  4064. emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
  4065. emitEmptyBoundParameters);
  4066. }
  4067. void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
  4068. CodeGenModule &CGM, StringRef ParentName,
  4069. const OMPTargetParallelForDirective &S) {
  4070. // Emit SPMD target parallel for region as a standalone region.
  4071. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4072. emitTargetParallelForRegion(CGF, S, Action);
  4073. };
  4074. llvm::Function *Fn;
  4075. llvm::Constant *Addr;
  4076. // Emit target region as a standalone region.
  4077. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4078. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4079. assert(Fn && Addr && "Target device function emission failed.");
  4080. }
  4081. void CodeGenFunction::EmitOMPTargetParallelForDirective(
  4082. const OMPTargetParallelForDirective &S) {
  4083. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4084. emitTargetParallelForRegion(CGF, S, Action);
  4085. };
  4086. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4087. }
  4088. static void
  4089. emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
  4090. const OMPTargetParallelForSimdDirective &S,
  4091. PrePostActionTy &Action) {
  4092. Action.Enter(CGF);
  4093. // Emit directive as a combined directive that consists of two implicit
  4094. // directives: 'parallel' with 'for' directive.
  4095. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
  4096. CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
  4097. emitDispatchForLoopBounds);
  4098. };
  4099. emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
  4100. emitEmptyBoundParameters);
  4101. }
  4102. void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
  4103. CodeGenModule &CGM, StringRef ParentName,
  4104. const OMPTargetParallelForSimdDirective &S) {
  4105. // Emit SPMD target parallel for region as a standalone region.
  4106. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4107. emitTargetParallelForSimdRegion(CGF, S, Action);
  4108. };
  4109. llvm::Function *Fn;
  4110. llvm::Constant *Addr;
  4111. // Emit target region as a standalone region.
  4112. CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
  4113. S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
  4114. assert(Fn && Addr && "Target device function emission failed.");
  4115. }
  4116. void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
  4117. const OMPTargetParallelForSimdDirective &S) {
  4118. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
  4119. emitTargetParallelForSimdRegion(CGF, S, Action);
  4120. };
  4121. emitCommonOMPTargetDirective(*this, S, CodeGen);
  4122. }
  4123. /// Emit a helper variable and return corresponding lvalue.
  4124. static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
  4125. const ImplicitParamDecl *PVD,
  4126. CodeGenFunction::OMPPrivateScope &Privates) {
  4127. auto *VDecl = cast<VarDecl>(Helper->getDecl());
  4128. Privates.addPrivate(
  4129. VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
  4130. }
  4131. void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
  4132. assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
  4133. // Emit outlined function for task construct.
  4134. auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
  4135. auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
  4136. auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
  4137. const Expr *IfCond = nullptr;
  4138. for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
  4139. if (C->getNameModifier() == OMPD_unknown ||
  4140. C->getNameModifier() == OMPD_taskloop) {
  4141. IfCond = C->getCondition();
  4142. break;
  4143. }
  4144. }
  4145. OMPTaskDataTy Data;
  4146. // Check if taskloop must be emitted without taskgroup.
  4147. Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
  4148. // TODO: Check if we should emit tied or untied task.
  4149. Data.Tied = true;
  4150. // Set scheduling for taskloop
  4151. if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
  4152. // grainsize clause
  4153. Data.Schedule.setInt(/*IntVal=*/false);
  4154. Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
  4155. } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
  4156. // num_tasks clause
  4157. Data.Schedule.setInt(/*IntVal=*/true);
  4158. Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
  4159. }
  4160. auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
  4161. // if (PreCond) {
  4162. // for (IV in 0..LastIteration) BODY;
  4163. // <Final counter/linear vars updates>;
  4164. // }
  4165. //
  4166. // Emit: if (PreCond) - begin.
  4167. // If the condition constant folds and can be elided, avoid emitting the
  4168. // whole loop.
  4169. bool CondConstant;
  4170. llvm::BasicBlock *ContBlock = nullptr;
  4171. OMPLoopScope PreInitScope(CGF, S);
  4172. if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
  4173. if (!CondConstant)
  4174. return;
  4175. } else {
  4176. auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
  4177. ContBlock = CGF.createBasicBlock("taskloop.if.end");
  4178. emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
  4179. CGF.getProfileCount(&S));
  4180. CGF.EmitBlock(ThenBlock);
  4181. CGF.incrementProfileCounter(&S);
  4182. }
  4183. if (isOpenMPSimdDirective(S.getDirectiveKind()))
  4184. CGF.EmitOMPSimdInit(S);
  4185. OMPPrivateScope LoopScope(CGF);
  4186. // Emit helper vars inits.
  4187. enum { LowerBound = 5, UpperBound, Stride, LastIter };
  4188. auto *I = CS->getCapturedDecl()->param_begin();
  4189. auto *LBP = std::next(I, LowerBound);
  4190. auto *UBP = std::next(I, UpperBound);
  4191. auto *STP = std::next(I, Stride);
  4192. auto *LIP = std::next(I, LastIter);
  4193. mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
  4194. LoopScope);
  4195. mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
  4196. LoopScope);
  4197. mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
  4198. mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
  4199. LoopScope);
  4200. CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
  4201. bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
  4202. (void)LoopScope.Privatize();
  4203. // Emit the loop iteration variable.
  4204. const Expr *IVExpr = S.getIterationVariable();
  4205. const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
  4206. CGF.EmitVarDecl(*IVDecl);
  4207. CGF.EmitIgnoredExpr(S.getInit());
  4208. // Emit the iterations count variable.
  4209. // If it is not a variable, Sema decided to calculate iterations count on
  4210. // each iteration (e.g., it is foldable into a constant).
  4211. if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
  4212. CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
  4213. // Emit calculation of the iterations count.
  4214. CGF.EmitIgnoredExpr(S.getCalcLastIteration());
  4215. }
  4216. CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
  4217. S.getInc(),
  4218. [&S](CodeGenFunction &CGF) {
  4219. CGF.EmitOMPLoopBody(S, JumpDest());
  4220. CGF.EmitStopPoint(&S);
  4221. },
  4222. [](CodeGenFunction &) {});
  4223. // Emit: if (PreCond) - end.
  4224. if (ContBlock) {
  4225. CGF.EmitBranch(ContBlock);
  4226. CGF.EmitBlock(ContBlock, true);
  4227. }
  4228. // Emit final copy of the lastprivate variables if IsLastIter != 0.
  4229. if (HasLastprivateClause) {
  4230. CGF.EmitOMPLastprivateClauseFinal(
  4231. S, isOpenMPSimdDirective(S.getDirectiveKind()),
  4232. CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
  4233. CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
  4234. (*LIP)->getType(), S.getLocStart())));
  4235. }
  4236. };
  4237. auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
  4238. IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
  4239. const OMPTaskDataTy &Data) {
  4240. auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
  4241. OMPLoopScope PreInitScope(CGF, S);
  4242. CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
  4243. OutlinedFn, SharedsTy,
  4244. CapturedStruct, IfCond, Data);
  4245. };
  4246. CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
  4247. CodeGen);
  4248. };
  4249. if (Data.Nogroup)
  4250. EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
  4251. else {
  4252. CGM.getOpenMPRuntime().emitTaskgroupRegion(
  4253. *this,
  4254. [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
  4255. PrePostActionTy &Action) {
  4256. Action.Enter(CGF);
  4257. CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
  4258. },
  4259. S.getLocStart());
  4260. }
  4261. }
  4262. void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
  4263. EmitOMPTaskLoopBasedDirective(S);
  4264. }
  4265. void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
  4266. const OMPTaskLoopSimdDirective &S) {
  4267. EmitOMPTaskLoopBasedDirective(S);
  4268. }
  4269. // Generate the instructions for '#pragma omp target update' directive.
  4270. void CodeGenFunction::EmitOMPTargetUpdateDirective(
  4271. const OMPTargetUpdateDirective &S) {
  4272. // If we don't have target devices, don't bother emitting the data mapping
  4273. // code.
  4274. if (CGM.getLangOpts().OMPTargetTriples.empty())
  4275. return;
  4276. // Check if we have any if clause associated with the directive.
  4277. const Expr *IfCond = nullptr;
  4278. if (auto *C = S.getSingleClause<OMPIfClause>())
  4279. IfCond = C->getCondition();
  4280. // Check if we have any device clause associated with the directive.
  4281. const Expr *Device = nullptr;
  4282. if (auto *C = S.getSingleClause<OMPDeviceClause>())
  4283. Device = C->getDevice();
  4284. auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
  4285. PrePostActionTy &) {
  4286. CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
  4287. Device);
  4288. };
  4289. OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
  4290. CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
  4291. CodeGen);
  4292. }