UnwrappedLineParser.cpp 87 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766
  1. //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the implementation of the UnwrappedLineParser,
  11. /// which turns a stream of tokens into UnwrappedLines.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "UnwrappedLineParser.h"
  15. #include "llvm/ADT/STLExtras.h"
  16. #include "llvm/Support/Debug.h"
  17. #include "llvm/Support/raw_ostream.h"
  18. #include <algorithm>
  19. #define DEBUG_TYPE "format-parser"
  20. namespace clang {
  21. namespace format {
  22. class FormatTokenSource {
  23. public:
  24. virtual ~FormatTokenSource() {}
  25. virtual FormatToken *getNextToken() = 0;
  26. virtual unsigned getPosition() = 0;
  27. virtual FormatToken *setPosition(unsigned Position) = 0;
  28. };
  29. namespace {
  30. class ScopedDeclarationState {
  31. public:
  32. ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  33. bool MustBeDeclaration)
  34. : Line(Line), Stack(Stack) {
  35. Line.MustBeDeclaration = MustBeDeclaration;
  36. Stack.push_back(MustBeDeclaration);
  37. }
  38. ~ScopedDeclarationState() {
  39. Stack.pop_back();
  40. if (!Stack.empty())
  41. Line.MustBeDeclaration = Stack.back();
  42. else
  43. Line.MustBeDeclaration = true;
  44. }
  45. private:
  46. UnwrappedLine &Line;
  47. std::vector<bool> &Stack;
  48. };
  49. static bool isLineComment(const FormatToken &FormatTok) {
  50. return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  51. }
  52. // Checks if \p FormatTok is a line comment that continues the line comment
  53. // \p Previous. The original column of \p MinColumnToken is used to determine
  54. // whether \p FormatTok is indented enough to the right to continue \p Previous.
  55. static bool continuesLineComment(const FormatToken &FormatTok,
  56. const FormatToken *Previous,
  57. const FormatToken *MinColumnToken) {
  58. if (!Previous || !MinColumnToken)
  59. return false;
  60. unsigned MinContinueColumn =
  61. MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  62. return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  63. isLineComment(*Previous) &&
  64. FormatTok.OriginalColumn >= MinContinueColumn;
  65. }
  66. class ScopedMacroState : public FormatTokenSource {
  67. public:
  68. ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  69. FormatToken *&ResetToken)
  70. : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  71. PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  72. Token(nullptr), PreviousToken(nullptr) {
  73. FakeEOF.Tok.startToken();
  74. FakeEOF.Tok.setKind(tok::eof);
  75. TokenSource = this;
  76. Line.Level = 0;
  77. Line.InPPDirective = true;
  78. }
  79. ~ScopedMacroState() override {
  80. TokenSource = PreviousTokenSource;
  81. ResetToken = Token;
  82. Line.InPPDirective = false;
  83. Line.Level = PreviousLineLevel;
  84. }
  85. FormatToken *getNextToken() override {
  86. // The \c UnwrappedLineParser guards against this by never calling
  87. // \c getNextToken() after it has encountered the first eof token.
  88. assert(!eof());
  89. PreviousToken = Token;
  90. Token = PreviousTokenSource->getNextToken();
  91. if (eof())
  92. return &FakeEOF;
  93. return Token;
  94. }
  95. unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  96. FormatToken *setPosition(unsigned Position) override {
  97. PreviousToken = nullptr;
  98. Token = PreviousTokenSource->setPosition(Position);
  99. return Token;
  100. }
  101. private:
  102. bool eof() {
  103. return Token && Token->HasUnescapedNewline &&
  104. !continuesLineComment(*Token, PreviousToken,
  105. /*MinColumnToken=*/PreviousToken);
  106. }
  107. FormatToken FakeEOF;
  108. UnwrappedLine &Line;
  109. FormatTokenSource *&TokenSource;
  110. FormatToken *&ResetToken;
  111. unsigned PreviousLineLevel;
  112. FormatTokenSource *PreviousTokenSource;
  113. FormatToken *Token;
  114. FormatToken *PreviousToken;
  115. };
  116. } // end anonymous namespace
  117. class ScopedLineState {
  118. public:
  119. ScopedLineState(UnwrappedLineParser &Parser,
  120. bool SwitchToPreprocessorLines = false)
  121. : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  122. if (SwitchToPreprocessorLines)
  123. Parser.CurrentLines = &Parser.PreprocessorDirectives;
  124. else if (!Parser.Line->Tokens.empty())
  125. Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
  126. PreBlockLine = std::move(Parser.Line);
  127. Parser.Line = std::make_unique<UnwrappedLine>();
  128. Parser.Line->Level = PreBlockLine->Level;
  129. Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
  130. }
  131. ~ScopedLineState() {
  132. if (!Parser.Line->Tokens.empty()) {
  133. Parser.addUnwrappedLine();
  134. }
  135. assert(Parser.Line->Tokens.empty());
  136. Parser.Line = std::move(PreBlockLine);
  137. if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
  138. Parser.MustBreakBeforeNextToken = true;
  139. Parser.CurrentLines = OriginalLines;
  140. }
  141. private:
  142. UnwrappedLineParser &Parser;
  143. std::unique_ptr<UnwrappedLine> PreBlockLine;
  144. SmallVectorImpl<UnwrappedLine> *OriginalLines;
  145. };
  146. class CompoundStatementIndenter {
  147. public:
  148. CompoundStatementIndenter(UnwrappedLineParser *Parser,
  149. const FormatStyle &Style, unsigned &LineLevel)
  150. : CompoundStatementIndenter(Parser, LineLevel,
  151. Style.BraceWrapping.AfterControlStatement,
  152. Style.BraceWrapping.IndentBraces) {}
  153. CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
  154. bool WrapBrace, bool IndentBrace)
  155. : LineLevel(LineLevel), OldLineLevel(LineLevel) {
  156. if (WrapBrace)
  157. Parser->addUnwrappedLine();
  158. if (IndentBrace)
  159. ++LineLevel;
  160. }
  161. ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
  162. private:
  163. unsigned &LineLevel;
  164. unsigned OldLineLevel;
  165. };
  166. namespace {
  167. class IndexedTokenSource : public FormatTokenSource {
  168. public:
  169. IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
  170. : Tokens(Tokens), Position(-1) {}
  171. FormatToken *getNextToken() override {
  172. ++Position;
  173. return Tokens[Position];
  174. }
  175. unsigned getPosition() override {
  176. assert(Position >= 0);
  177. return Position;
  178. }
  179. FormatToken *setPosition(unsigned P) override {
  180. Position = P;
  181. return Tokens[Position];
  182. }
  183. void reset() { Position = -1; }
  184. private:
  185. ArrayRef<FormatToken *> Tokens;
  186. int Position;
  187. };
  188. } // end anonymous namespace
  189. UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
  190. const AdditionalKeywords &Keywords,
  191. unsigned FirstStartColumn,
  192. ArrayRef<FormatToken *> Tokens,
  193. UnwrappedLineConsumer &Callback)
  194. : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
  195. CurrentLines(&Lines), Style(Style), Keywords(Keywords),
  196. CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
  197. Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
  198. IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
  199. ? IG_Rejected
  200. : IG_Inited),
  201. IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
  202. void UnwrappedLineParser::reset() {
  203. PPBranchLevel = -1;
  204. IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
  205. ? IG_Rejected
  206. : IG_Inited;
  207. IncludeGuardToken = nullptr;
  208. Line.reset(new UnwrappedLine);
  209. CommentsBeforeNextToken.clear();
  210. FormatTok = nullptr;
  211. MustBreakBeforeNextToken = false;
  212. PreprocessorDirectives.clear();
  213. CurrentLines = &Lines;
  214. DeclarationScopeStack.clear();
  215. PPStack.clear();
  216. Line->FirstStartColumn = FirstStartColumn;
  217. }
  218. void UnwrappedLineParser::parse() {
  219. IndexedTokenSource TokenSource(AllTokens);
  220. Line->FirstStartColumn = FirstStartColumn;
  221. do {
  222. LLVM_DEBUG(llvm::dbgs() << "----\n");
  223. reset();
  224. Tokens = &TokenSource;
  225. TokenSource.reset();
  226. readToken();
  227. parseFile();
  228. // If we found an include guard then all preprocessor directives (other than
  229. // the guard) are over-indented by one.
  230. if (IncludeGuard == IG_Found)
  231. for (auto &Line : Lines)
  232. if (Line.InPPDirective && Line.Level > 0)
  233. --Line.Level;
  234. // Create line with eof token.
  235. pushToken(FormatTok);
  236. addUnwrappedLine();
  237. for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
  238. E = Lines.end();
  239. I != E; ++I) {
  240. Callback.consumeUnwrappedLine(*I);
  241. }
  242. Callback.finishRun();
  243. Lines.clear();
  244. while (!PPLevelBranchIndex.empty() &&
  245. PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
  246. PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
  247. PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
  248. }
  249. if (!PPLevelBranchIndex.empty()) {
  250. ++PPLevelBranchIndex.back();
  251. assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
  252. assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
  253. }
  254. } while (!PPLevelBranchIndex.empty());
  255. }
  256. void UnwrappedLineParser::parseFile() {
  257. // The top-level context in a file always has declarations, except for pre-
  258. // processor directives and JavaScript files.
  259. bool MustBeDeclaration =
  260. !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
  261. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  262. MustBeDeclaration);
  263. if (Style.Language == FormatStyle::LK_TextProto)
  264. parseBracedList();
  265. else
  266. parseLevel(/*HasOpeningBrace=*/false);
  267. // Make sure to format the remaining tokens.
  268. //
  269. // LK_TextProto is special since its top-level is parsed as the body of a
  270. // braced list, which does not necessarily have natural line separators such
  271. // as a semicolon. Comments after the last entry that have been determined to
  272. // not belong to that line, as in:
  273. // key: value
  274. // // endfile comment
  275. // do not have a chance to be put on a line of their own until this point.
  276. // Here we add this newline before end-of-file comments.
  277. if (Style.Language == FormatStyle::LK_TextProto &&
  278. !CommentsBeforeNextToken.empty())
  279. addUnwrappedLine();
  280. flushComments(true);
  281. addUnwrappedLine();
  282. }
  283. void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
  284. bool SwitchLabelEncountered = false;
  285. do {
  286. tok::TokenKind kind = FormatTok->Tok.getKind();
  287. if (FormatTok->Type == TT_MacroBlockBegin) {
  288. kind = tok::l_brace;
  289. } else if (FormatTok->Type == TT_MacroBlockEnd) {
  290. kind = tok::r_brace;
  291. }
  292. switch (kind) {
  293. case tok::comment:
  294. nextToken();
  295. addUnwrappedLine();
  296. break;
  297. case tok::l_brace:
  298. // FIXME: Add parameter whether this can happen - if this happens, we must
  299. // be in a non-declaration context.
  300. if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
  301. continue;
  302. parseBlock(/*MustBeDeclaration=*/false);
  303. addUnwrappedLine();
  304. break;
  305. case tok::r_brace:
  306. if (HasOpeningBrace)
  307. return;
  308. nextToken();
  309. addUnwrappedLine();
  310. break;
  311. case tok::kw_default: {
  312. unsigned StoredPosition = Tokens->getPosition();
  313. FormatToken *Next;
  314. do {
  315. Next = Tokens->getNextToken();
  316. } while (Next && Next->is(tok::comment));
  317. FormatTok = Tokens->setPosition(StoredPosition);
  318. if (Next && Next->isNot(tok::colon)) {
  319. // default not followed by ':' is not a case label; treat it like
  320. // an identifier.
  321. parseStructuralElement();
  322. break;
  323. }
  324. // Else, if it is 'default:', fall through to the case handling.
  325. LLVM_FALLTHROUGH;
  326. }
  327. case tok::kw_case:
  328. if (Style.Language == FormatStyle::LK_JavaScript &&
  329. Line->MustBeDeclaration) {
  330. // A 'case: string' style field declaration.
  331. parseStructuralElement();
  332. break;
  333. }
  334. if (!SwitchLabelEncountered &&
  335. (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
  336. ++Line->Level;
  337. SwitchLabelEncountered = true;
  338. parseStructuralElement();
  339. break;
  340. default:
  341. parseStructuralElement();
  342. break;
  343. }
  344. } while (!eof());
  345. }
  346. void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
  347. // We'll parse forward through the tokens until we hit
  348. // a closing brace or eof - note that getNextToken() will
  349. // parse macros, so this will magically work inside macro
  350. // definitions, too.
  351. unsigned StoredPosition = Tokens->getPosition();
  352. FormatToken *Tok = FormatTok;
  353. const FormatToken *PrevTok = Tok->Previous;
  354. // Keep a stack of positions of lbrace tokens. We will
  355. // update information about whether an lbrace starts a
  356. // braced init list or a different block during the loop.
  357. SmallVector<FormatToken *, 8> LBraceStack;
  358. assert(Tok->Tok.is(tok::l_brace));
  359. do {
  360. // Get next non-comment token.
  361. FormatToken *NextTok;
  362. unsigned ReadTokens = 0;
  363. do {
  364. NextTok = Tokens->getNextToken();
  365. ++ReadTokens;
  366. } while (NextTok->is(tok::comment));
  367. switch (Tok->Tok.getKind()) {
  368. case tok::l_brace:
  369. if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
  370. if (PrevTok->isOneOf(tok::colon, tok::less))
  371. // A ':' indicates this code is in a type, or a braced list
  372. // following a label in an object literal ({a: {b: 1}}).
  373. // A '<' could be an object used in a comparison, but that is nonsense
  374. // code (can never return true), so more likely it is a generic type
  375. // argument (`X<{a: string; b: number}>`).
  376. // The code below could be confused by semicolons between the
  377. // individual members in a type member list, which would normally
  378. // trigger BK_Block. In both cases, this must be parsed as an inline
  379. // braced init.
  380. Tok->BlockKind = BK_BracedInit;
  381. else if (PrevTok->is(tok::r_paren))
  382. // `) { }` can only occur in function or method declarations in JS.
  383. Tok->BlockKind = BK_Block;
  384. } else {
  385. Tok->BlockKind = BK_Unknown;
  386. }
  387. LBraceStack.push_back(Tok);
  388. break;
  389. case tok::r_brace:
  390. if (LBraceStack.empty())
  391. break;
  392. if (LBraceStack.back()->BlockKind == BK_Unknown) {
  393. bool ProbablyBracedList = false;
  394. if (Style.Language == FormatStyle::LK_Proto) {
  395. ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
  396. } else {
  397. // Using OriginalColumn to distinguish between ObjC methods and
  398. // binary operators is a bit hacky.
  399. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
  400. NextTok->OriginalColumn == 0;
  401. // If there is a comma, semicolon or right paren after the closing
  402. // brace, we assume this is a braced initializer list. Note that
  403. // regardless how we mark inner braces here, we will overwrite the
  404. // BlockKind later if we parse a braced list (where all blocks
  405. // inside are by default braced lists), or when we explicitly detect
  406. // blocks (for example while parsing lambdas).
  407. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
  408. // braced list in JS.
  409. ProbablyBracedList =
  410. (Style.Language == FormatStyle::LK_JavaScript &&
  411. NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
  412. Keywords.kw_as)) ||
  413. (Style.isCpp() && NextTok->is(tok::l_paren)) ||
  414. NextTok->isOneOf(tok::comma, tok::period, tok::colon,
  415. tok::r_paren, tok::r_square, tok::l_brace,
  416. tok::ellipsis) ||
  417. (NextTok->is(tok::identifier) &&
  418. !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
  419. (NextTok->is(tok::semi) &&
  420. (!ExpectClassBody || LBraceStack.size() != 1)) ||
  421. (NextTok->isBinaryOperator() && !NextIsObjCMethod);
  422. if (NextTok->is(tok::l_square)) {
  423. // We can have an array subscript after a braced init
  424. // list, but C++11 attributes are expected after blocks.
  425. NextTok = Tokens->getNextToken();
  426. ++ReadTokens;
  427. ProbablyBracedList = NextTok->isNot(tok::l_square);
  428. }
  429. }
  430. if (ProbablyBracedList) {
  431. Tok->BlockKind = BK_BracedInit;
  432. LBraceStack.back()->BlockKind = BK_BracedInit;
  433. } else {
  434. Tok->BlockKind = BK_Block;
  435. LBraceStack.back()->BlockKind = BK_Block;
  436. }
  437. }
  438. LBraceStack.pop_back();
  439. break;
  440. case tok::identifier:
  441. if (!Tok->is(TT_StatementMacro))
  442. break;
  443. LLVM_FALLTHROUGH;
  444. case tok::at:
  445. case tok::semi:
  446. case tok::kw_if:
  447. case tok::kw_while:
  448. case tok::kw_for:
  449. case tok::kw_switch:
  450. case tok::kw_try:
  451. case tok::kw___try:
  452. if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
  453. LBraceStack.back()->BlockKind = BK_Block;
  454. break;
  455. default:
  456. break;
  457. }
  458. PrevTok = Tok;
  459. Tok = NextTok;
  460. } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
  461. // Assume other blocks for all unclosed opening braces.
  462. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
  463. if (LBraceStack[i]->BlockKind == BK_Unknown)
  464. LBraceStack[i]->BlockKind = BK_Block;
  465. }
  466. FormatTok = Tokens->setPosition(StoredPosition);
  467. }
  468. template <class T>
  469. static inline void hash_combine(std::size_t &seed, const T &v) {
  470. std::hash<T> hasher;
  471. seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
  472. }
  473. size_t UnwrappedLineParser::computePPHash() const {
  474. size_t h = 0;
  475. for (const auto &i : PPStack) {
  476. hash_combine(h, size_t(i.Kind));
  477. hash_combine(h, i.Line);
  478. }
  479. return h;
  480. }
  481. void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
  482. bool MunchSemi) {
  483. assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
  484. "'{' or macro block token expected");
  485. const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
  486. FormatTok->BlockKind = BK_Block;
  487. size_t PPStartHash = computePPHash();
  488. unsigned InitialLevel = Line->Level;
  489. nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
  490. if (MacroBlock && FormatTok->is(tok::l_paren))
  491. parseParens();
  492. size_t NbPreprocessorDirectives =
  493. CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
  494. addUnwrappedLine();
  495. size_t OpeningLineIndex =
  496. CurrentLines->empty()
  497. ? (UnwrappedLine::kInvalidIndex)
  498. : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
  499. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  500. MustBeDeclaration);
  501. if (AddLevel)
  502. ++Line->Level;
  503. parseLevel(/*HasOpeningBrace=*/true);
  504. if (eof())
  505. return;
  506. if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
  507. : !FormatTok->is(tok::r_brace)) {
  508. Line->Level = InitialLevel;
  509. FormatTok->BlockKind = BK_Block;
  510. return;
  511. }
  512. size_t PPEndHash = computePPHash();
  513. // Munch the closing brace.
  514. nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
  515. if (MacroBlock && FormatTok->is(tok::l_paren))
  516. parseParens();
  517. if (MunchSemi && FormatTok->Tok.is(tok::semi))
  518. nextToken();
  519. Line->Level = InitialLevel;
  520. if (PPStartHash == PPEndHash) {
  521. Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
  522. if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
  523. // Update the opening line to add the forward reference as well
  524. (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
  525. CurrentLines->size() - 1;
  526. }
  527. }
  528. }
  529. static bool isGoogScope(const UnwrappedLine &Line) {
  530. // FIXME: Closure-library specific stuff should not be hard-coded but be
  531. // configurable.
  532. if (Line.Tokens.size() < 4)
  533. return false;
  534. auto I = Line.Tokens.begin();
  535. if (I->Tok->TokenText != "goog")
  536. return false;
  537. ++I;
  538. if (I->Tok->isNot(tok::period))
  539. return false;
  540. ++I;
  541. if (I->Tok->TokenText != "scope")
  542. return false;
  543. ++I;
  544. return I->Tok->is(tok::l_paren);
  545. }
  546. static bool isIIFE(const UnwrappedLine &Line,
  547. const AdditionalKeywords &Keywords) {
  548. // Look for the start of an immediately invoked anonymous function.
  549. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
  550. // This is commonly done in JavaScript to create a new, anonymous scope.
  551. // Example: (function() { ... })()
  552. if (Line.Tokens.size() < 3)
  553. return false;
  554. auto I = Line.Tokens.begin();
  555. if (I->Tok->isNot(tok::l_paren))
  556. return false;
  557. ++I;
  558. if (I->Tok->isNot(Keywords.kw_function))
  559. return false;
  560. ++I;
  561. return I->Tok->is(tok::l_paren);
  562. }
  563. static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
  564. const FormatToken &InitialToken) {
  565. if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
  566. return Style.BraceWrapping.AfterNamespace;
  567. if (InitialToken.is(tok::kw_class))
  568. return Style.BraceWrapping.AfterClass;
  569. if (InitialToken.is(tok::kw_union))
  570. return Style.BraceWrapping.AfterUnion;
  571. if (InitialToken.is(tok::kw_struct))
  572. return Style.BraceWrapping.AfterStruct;
  573. return false;
  574. }
  575. void UnwrappedLineParser::parseChildBlock() {
  576. FormatTok->BlockKind = BK_Block;
  577. nextToken();
  578. {
  579. bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
  580. (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
  581. ScopedLineState LineState(*this);
  582. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  583. /*MustBeDeclaration=*/false);
  584. Line->Level += SkipIndent ? 0 : 1;
  585. parseLevel(/*HasOpeningBrace=*/true);
  586. flushComments(isOnNewLine(*FormatTok));
  587. Line->Level -= SkipIndent ? 0 : 1;
  588. }
  589. nextToken();
  590. }
  591. void UnwrappedLineParser::parsePPDirective() {
  592. assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
  593. ScopedMacroState MacroState(*Line, Tokens, FormatTok);
  594. nextToken();
  595. if (!FormatTok->Tok.getIdentifierInfo()) {
  596. parsePPUnknown();
  597. return;
  598. }
  599. switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
  600. case tok::pp_define:
  601. parsePPDefine();
  602. return;
  603. case tok::pp_if:
  604. parsePPIf(/*IfDef=*/false);
  605. break;
  606. case tok::pp_ifdef:
  607. case tok::pp_ifndef:
  608. parsePPIf(/*IfDef=*/true);
  609. break;
  610. case tok::pp_else:
  611. parsePPElse();
  612. break;
  613. case tok::pp_elif:
  614. parsePPElIf();
  615. break;
  616. case tok::pp_endif:
  617. parsePPEndIf();
  618. break;
  619. default:
  620. parsePPUnknown();
  621. break;
  622. }
  623. }
  624. void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
  625. size_t Line = CurrentLines->size();
  626. if (CurrentLines == &PreprocessorDirectives)
  627. Line += Lines.size();
  628. if (Unreachable ||
  629. (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
  630. PPStack.push_back({PP_Unreachable, Line});
  631. else
  632. PPStack.push_back({PP_Conditional, Line});
  633. }
  634. void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
  635. ++PPBranchLevel;
  636. assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
  637. if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
  638. PPLevelBranchIndex.push_back(0);
  639. PPLevelBranchCount.push_back(0);
  640. }
  641. PPChainBranchIndex.push(0);
  642. bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
  643. conditionalCompilationCondition(Unreachable || Skip);
  644. }
  645. void UnwrappedLineParser::conditionalCompilationAlternative() {
  646. if (!PPStack.empty())
  647. PPStack.pop_back();
  648. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  649. if (!PPChainBranchIndex.empty())
  650. ++PPChainBranchIndex.top();
  651. conditionalCompilationCondition(
  652. PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
  653. PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
  654. }
  655. void UnwrappedLineParser::conditionalCompilationEnd() {
  656. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  657. if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
  658. if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
  659. PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
  660. }
  661. }
  662. // Guard against #endif's without #if.
  663. if (PPBranchLevel > -1)
  664. --PPBranchLevel;
  665. if (!PPChainBranchIndex.empty())
  666. PPChainBranchIndex.pop();
  667. if (!PPStack.empty())
  668. PPStack.pop_back();
  669. }
  670. void UnwrappedLineParser::parsePPIf(bool IfDef) {
  671. bool IfNDef = FormatTok->is(tok::pp_ifndef);
  672. nextToken();
  673. bool Unreachable = false;
  674. if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
  675. Unreachable = true;
  676. if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
  677. Unreachable = true;
  678. conditionalCompilationStart(Unreachable);
  679. FormatToken *IfCondition = FormatTok;
  680. // If there's a #ifndef on the first line, and the only lines before it are
  681. // comments, it could be an include guard.
  682. bool MaybeIncludeGuard = IfNDef;
  683. if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
  684. for (auto &Line : Lines) {
  685. if (!Line.Tokens.front().Tok->is(tok::comment)) {
  686. MaybeIncludeGuard = false;
  687. IncludeGuard = IG_Rejected;
  688. break;
  689. }
  690. }
  691. --PPBranchLevel;
  692. parsePPUnknown();
  693. ++PPBranchLevel;
  694. if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
  695. IncludeGuard = IG_IfNdefed;
  696. IncludeGuardToken = IfCondition;
  697. }
  698. }
  699. void UnwrappedLineParser::parsePPElse() {
  700. // If a potential include guard has an #else, it's not an include guard.
  701. if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
  702. IncludeGuard = IG_Rejected;
  703. conditionalCompilationAlternative();
  704. if (PPBranchLevel > -1)
  705. --PPBranchLevel;
  706. parsePPUnknown();
  707. ++PPBranchLevel;
  708. }
  709. void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
  710. void UnwrappedLineParser::parsePPEndIf() {
  711. conditionalCompilationEnd();
  712. parsePPUnknown();
  713. // If the #endif of a potential include guard is the last thing in the file,
  714. // then we found an include guard.
  715. unsigned TokenPosition = Tokens->getPosition();
  716. FormatToken *PeekNext = AllTokens[TokenPosition];
  717. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
  718. PeekNext->is(tok::eof) &&
  719. Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  720. IncludeGuard = IG_Found;
  721. }
  722. void UnwrappedLineParser::parsePPDefine() {
  723. nextToken();
  724. if (!FormatTok->Tok.getIdentifierInfo()) {
  725. IncludeGuard = IG_Rejected;
  726. IncludeGuardToken = nullptr;
  727. parsePPUnknown();
  728. return;
  729. }
  730. if (IncludeGuard == IG_IfNdefed &&
  731. IncludeGuardToken->TokenText == FormatTok->TokenText) {
  732. IncludeGuard = IG_Defined;
  733. IncludeGuardToken = nullptr;
  734. for (auto &Line : Lines) {
  735. if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
  736. IncludeGuard = IG_Rejected;
  737. break;
  738. }
  739. }
  740. }
  741. nextToken();
  742. if (FormatTok->Tok.getKind() == tok::l_paren &&
  743. FormatTok->WhitespaceRange.getBegin() ==
  744. FormatTok->WhitespaceRange.getEnd()) {
  745. parseParens();
  746. }
  747. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  748. Line->Level += PPBranchLevel + 1;
  749. addUnwrappedLine();
  750. ++Line->Level;
  751. // Errors during a preprocessor directive can only affect the layout of the
  752. // preprocessor directive, and thus we ignore them. An alternative approach
  753. // would be to use the same approach we use on the file level (no
  754. // re-indentation if there was a structural error) within the macro
  755. // definition.
  756. parseFile();
  757. }
  758. void UnwrappedLineParser::parsePPUnknown() {
  759. do {
  760. nextToken();
  761. } while (!eof());
  762. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  763. Line->Level += PPBranchLevel + 1;
  764. addUnwrappedLine();
  765. }
  766. // Here we blacklist certain tokens that are not usually the first token in an
  767. // unwrapped line. This is used in attempt to distinguish macro calls without
  768. // trailing semicolons from other constructs split to several lines.
  769. static bool tokenCanStartNewLine(const clang::Token &Tok) {
  770. // Semicolon can be a null-statement, l_square can be a start of a macro or
  771. // a C++11 attribute, but this doesn't seem to be common.
  772. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
  773. Tok.isNot(tok::l_square) &&
  774. // Tokens that can only be used as binary operators and a part of
  775. // overloaded operator names.
  776. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
  777. Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
  778. Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
  779. Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
  780. Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
  781. Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
  782. Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
  783. Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
  784. Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
  785. Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
  786. Tok.isNot(tok::lesslessequal) &&
  787. // Colon is used in labels, base class lists, initializer lists,
  788. // range-based for loops, ternary operator, but should never be the
  789. // first token in an unwrapped line.
  790. Tok.isNot(tok::colon) &&
  791. // 'noexcept' is a trailing annotation.
  792. Tok.isNot(tok::kw_noexcept);
  793. }
  794. static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
  795. const FormatToken *FormatTok) {
  796. // FIXME: This returns true for C/C++ keywords like 'struct'.
  797. return FormatTok->is(tok::identifier) &&
  798. (FormatTok->Tok.getIdentifierInfo() == nullptr ||
  799. !FormatTok->isOneOf(
  800. Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
  801. Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
  802. Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
  803. Keywords.kw_let, Keywords.kw_var, tok::kw_const,
  804. Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
  805. Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
  806. Keywords.kw_from));
  807. }
  808. static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
  809. const FormatToken *FormatTok) {
  810. return FormatTok->Tok.isLiteral() ||
  811. FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
  812. mustBeJSIdent(Keywords, FormatTok);
  813. }
  814. // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
  815. // when encountered after a value (see mustBeJSIdentOrValue).
  816. static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
  817. const FormatToken *FormatTok) {
  818. return FormatTok->isOneOf(
  819. tok::kw_return, Keywords.kw_yield,
  820. // conditionals
  821. tok::kw_if, tok::kw_else,
  822. // loops
  823. tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
  824. // switch/case
  825. tok::kw_switch, tok::kw_case,
  826. // exceptions
  827. tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
  828. // declaration
  829. tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
  830. Keywords.kw_async, Keywords.kw_function,
  831. // import/export
  832. Keywords.kw_import, tok::kw_export);
  833. }
  834. // readTokenWithJavaScriptASI reads the next token and terminates the current
  835. // line if JavaScript Automatic Semicolon Insertion must
  836. // happen between the current token and the next token.
  837. //
  838. // This method is conservative - it cannot cover all edge cases of JavaScript,
  839. // but only aims to correctly handle certain well known cases. It *must not*
  840. // return true in speculative cases.
  841. void UnwrappedLineParser::readTokenWithJavaScriptASI() {
  842. FormatToken *Previous = FormatTok;
  843. readToken();
  844. FormatToken *Next = FormatTok;
  845. bool IsOnSameLine =
  846. CommentsBeforeNextToken.empty()
  847. ? Next->NewlinesBefore == 0
  848. : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
  849. if (IsOnSameLine)
  850. return;
  851. bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
  852. bool PreviousStartsTemplateExpr =
  853. Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
  854. if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
  855. // If the line contains an '@' sign, the previous token might be an
  856. // annotation, which can precede another identifier/value.
  857. bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
  858. [](UnwrappedLineNode &LineNode) {
  859. return LineNode.Tok->is(tok::at);
  860. }) != Line->Tokens.end();
  861. if (HasAt)
  862. return;
  863. }
  864. if (Next->is(tok::exclaim) && PreviousMustBeValue)
  865. return addUnwrappedLine();
  866. bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  867. bool NextEndsTemplateExpr =
  868. Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
  869. if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
  870. (PreviousMustBeValue ||
  871. Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
  872. tok::minusminus)))
  873. return addUnwrappedLine();
  874. if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
  875. isJSDeclOrStmt(Keywords, Next))
  876. return addUnwrappedLine();
  877. }
  878. void UnwrappedLineParser::parseStructuralElement() {
  879. assert(!FormatTok->is(tok::l_brace));
  880. if (Style.Language == FormatStyle::LK_TableGen &&
  881. FormatTok->is(tok::pp_include)) {
  882. nextToken();
  883. if (FormatTok->is(tok::string_literal))
  884. nextToken();
  885. addUnwrappedLine();
  886. return;
  887. }
  888. switch (FormatTok->Tok.getKind()) {
  889. case tok::kw_asm:
  890. nextToken();
  891. if (FormatTok->is(tok::l_brace)) {
  892. FormatTok->Type = TT_InlineASMBrace;
  893. nextToken();
  894. while (FormatTok && FormatTok->isNot(tok::eof)) {
  895. if (FormatTok->is(tok::r_brace)) {
  896. FormatTok->Type = TT_InlineASMBrace;
  897. nextToken();
  898. addUnwrappedLine();
  899. break;
  900. }
  901. FormatTok->Finalized = true;
  902. nextToken();
  903. }
  904. }
  905. break;
  906. case tok::kw_namespace:
  907. parseNamespace();
  908. return;
  909. case tok::kw_public:
  910. case tok::kw_protected:
  911. case tok::kw_private:
  912. if (Style.Language == FormatStyle::LK_Java ||
  913. Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
  914. nextToken();
  915. else
  916. parseAccessSpecifier();
  917. return;
  918. case tok::kw_if:
  919. parseIfThenElse();
  920. return;
  921. case tok::kw_for:
  922. case tok::kw_while:
  923. parseForOrWhileLoop();
  924. return;
  925. case tok::kw_do:
  926. parseDoWhile();
  927. return;
  928. case tok::kw_switch:
  929. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  930. // 'switch: string' field declaration.
  931. break;
  932. parseSwitch();
  933. return;
  934. case tok::kw_default:
  935. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  936. // 'default: string' field declaration.
  937. break;
  938. nextToken();
  939. if (FormatTok->is(tok::colon)) {
  940. parseLabel();
  941. return;
  942. }
  943. // e.g. "default void f() {}" in a Java interface.
  944. break;
  945. case tok::kw_case:
  946. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  947. // 'case: string' field declaration.
  948. break;
  949. parseCaseLabel();
  950. return;
  951. case tok::kw_try:
  952. case tok::kw___try:
  953. parseTryCatch();
  954. return;
  955. case tok::kw_extern:
  956. nextToken();
  957. if (FormatTok->Tok.is(tok::string_literal)) {
  958. nextToken();
  959. if (FormatTok->Tok.is(tok::l_brace)) {
  960. if (Style.BraceWrapping.AfterExternBlock) {
  961. addUnwrappedLine();
  962. parseBlock(/*MustBeDeclaration=*/true);
  963. } else {
  964. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
  965. }
  966. addUnwrappedLine();
  967. return;
  968. }
  969. }
  970. break;
  971. case tok::kw_export:
  972. if (Style.Language == FormatStyle::LK_JavaScript) {
  973. parseJavaScriptEs6ImportExport();
  974. return;
  975. }
  976. if (!Style.isCpp())
  977. break;
  978. // Handle C++ "(inline|export) namespace".
  979. LLVM_FALLTHROUGH;
  980. case tok::kw_inline:
  981. nextToken();
  982. if (FormatTok->Tok.is(tok::kw_namespace)) {
  983. parseNamespace();
  984. return;
  985. }
  986. break;
  987. case tok::identifier:
  988. if (FormatTok->is(TT_ForEachMacro)) {
  989. parseForOrWhileLoop();
  990. return;
  991. }
  992. if (FormatTok->is(TT_MacroBlockBegin)) {
  993. parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
  994. /*MunchSemi=*/false);
  995. return;
  996. }
  997. if (FormatTok->is(Keywords.kw_import)) {
  998. if (Style.Language == FormatStyle::LK_JavaScript) {
  999. parseJavaScriptEs6ImportExport();
  1000. return;
  1001. }
  1002. if (Style.Language == FormatStyle::LK_Proto) {
  1003. nextToken();
  1004. if (FormatTok->is(tok::kw_public))
  1005. nextToken();
  1006. if (!FormatTok->is(tok::string_literal))
  1007. return;
  1008. nextToken();
  1009. if (FormatTok->is(tok::semi))
  1010. nextToken();
  1011. addUnwrappedLine();
  1012. return;
  1013. }
  1014. }
  1015. if (Style.isCpp() &&
  1016. FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
  1017. Keywords.kw_slots, Keywords.kw_qslots)) {
  1018. nextToken();
  1019. if (FormatTok->is(tok::colon)) {
  1020. nextToken();
  1021. addUnwrappedLine();
  1022. return;
  1023. }
  1024. }
  1025. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1026. parseStatementMacro();
  1027. return;
  1028. }
  1029. if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
  1030. parseNamespace();
  1031. return;
  1032. }
  1033. // In all other cases, parse the declaration.
  1034. break;
  1035. default:
  1036. break;
  1037. }
  1038. do {
  1039. const FormatToken *Previous = FormatTok->Previous;
  1040. switch (FormatTok->Tok.getKind()) {
  1041. case tok::at:
  1042. nextToken();
  1043. if (FormatTok->Tok.is(tok::l_brace)) {
  1044. nextToken();
  1045. parseBracedList();
  1046. break;
  1047. } else if (Style.Language == FormatStyle::LK_Java &&
  1048. FormatTok->is(Keywords.kw_interface)) {
  1049. nextToken();
  1050. break;
  1051. }
  1052. switch (FormatTok->Tok.getObjCKeywordID()) {
  1053. case tok::objc_public:
  1054. case tok::objc_protected:
  1055. case tok::objc_package:
  1056. case tok::objc_private:
  1057. return parseAccessSpecifier();
  1058. case tok::objc_interface:
  1059. case tok::objc_implementation:
  1060. return parseObjCInterfaceOrImplementation();
  1061. case tok::objc_protocol:
  1062. if (parseObjCProtocol())
  1063. return;
  1064. break;
  1065. case tok::objc_end:
  1066. return; // Handled by the caller.
  1067. case tok::objc_optional:
  1068. case tok::objc_required:
  1069. nextToken();
  1070. addUnwrappedLine();
  1071. return;
  1072. case tok::objc_autoreleasepool:
  1073. nextToken();
  1074. if (FormatTok->Tok.is(tok::l_brace)) {
  1075. if (Style.BraceWrapping.AfterControlStatement ==
  1076. FormatStyle::BWACS_Always)
  1077. addUnwrappedLine();
  1078. parseBlock(/*MustBeDeclaration=*/false);
  1079. }
  1080. addUnwrappedLine();
  1081. return;
  1082. case tok::objc_synchronized:
  1083. nextToken();
  1084. if (FormatTok->Tok.is(tok::l_paren))
  1085. // Skip synchronization object
  1086. parseParens();
  1087. if (FormatTok->Tok.is(tok::l_brace)) {
  1088. if (Style.BraceWrapping.AfterControlStatement ==
  1089. FormatStyle::BWACS_Always)
  1090. addUnwrappedLine();
  1091. parseBlock(/*MustBeDeclaration=*/false);
  1092. }
  1093. addUnwrappedLine();
  1094. return;
  1095. case tok::objc_try:
  1096. // This branch isn't strictly necessary (the kw_try case below would
  1097. // do this too after the tok::at is parsed above). But be explicit.
  1098. parseTryCatch();
  1099. return;
  1100. default:
  1101. break;
  1102. }
  1103. break;
  1104. case tok::kw_enum:
  1105. // Ignore if this is part of "template <enum ...".
  1106. if (Previous && Previous->is(tok::less)) {
  1107. nextToken();
  1108. break;
  1109. }
  1110. // parseEnum falls through and does not yet add an unwrapped line as an
  1111. // enum definition can start a structural element.
  1112. if (!parseEnum())
  1113. break;
  1114. // This only applies for C++.
  1115. if (!Style.isCpp()) {
  1116. addUnwrappedLine();
  1117. return;
  1118. }
  1119. break;
  1120. case tok::kw_typedef:
  1121. nextToken();
  1122. if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
  1123. Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
  1124. Keywords.kw_CF_CLOSED_ENUM,
  1125. Keywords.kw_NS_CLOSED_ENUM))
  1126. parseEnum();
  1127. break;
  1128. case tok::kw_struct:
  1129. case tok::kw_union:
  1130. case tok::kw_class:
  1131. // parseRecord falls through and does not yet add an unwrapped line as a
  1132. // record declaration or definition can start a structural element.
  1133. parseRecord();
  1134. // This does not apply for Java, JavaScript and C#.
  1135. if (Style.Language == FormatStyle::LK_Java ||
  1136. Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
  1137. if (FormatTok->is(tok::semi))
  1138. nextToken();
  1139. addUnwrappedLine();
  1140. return;
  1141. }
  1142. break;
  1143. case tok::period:
  1144. nextToken();
  1145. // In Java, classes have an implicit static member "class".
  1146. if (Style.Language == FormatStyle::LK_Java && FormatTok &&
  1147. FormatTok->is(tok::kw_class))
  1148. nextToken();
  1149. if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
  1150. FormatTok->Tok.getIdentifierInfo())
  1151. // JavaScript only has pseudo keywords, all keywords are allowed to
  1152. // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
  1153. nextToken();
  1154. break;
  1155. case tok::semi:
  1156. nextToken();
  1157. addUnwrappedLine();
  1158. return;
  1159. case tok::r_brace:
  1160. addUnwrappedLine();
  1161. return;
  1162. case tok::l_paren:
  1163. parseParens();
  1164. break;
  1165. case tok::kw_operator:
  1166. nextToken();
  1167. if (FormatTok->isBinaryOperator())
  1168. nextToken();
  1169. break;
  1170. case tok::caret:
  1171. nextToken();
  1172. if (FormatTok->Tok.isAnyIdentifier() ||
  1173. FormatTok->isSimpleTypeSpecifier())
  1174. nextToken();
  1175. if (FormatTok->is(tok::l_paren))
  1176. parseParens();
  1177. if (FormatTok->is(tok::l_brace))
  1178. parseChildBlock();
  1179. break;
  1180. case tok::l_brace:
  1181. if (!tryToParseBracedList()) {
  1182. // A block outside of parentheses must be the last part of a
  1183. // structural element.
  1184. // FIXME: Figure out cases where this is not true, and add projections
  1185. // for them (the one we know is missing are lambdas).
  1186. if (Style.BraceWrapping.AfterFunction)
  1187. addUnwrappedLine();
  1188. FormatTok->Type = TT_FunctionLBrace;
  1189. parseBlock(/*MustBeDeclaration=*/false);
  1190. addUnwrappedLine();
  1191. return;
  1192. }
  1193. // Otherwise this was a braced init list, and the structural
  1194. // element continues.
  1195. break;
  1196. case tok::kw_try:
  1197. // We arrive here when parsing function-try blocks.
  1198. if (Style.BraceWrapping.AfterFunction)
  1199. addUnwrappedLine();
  1200. parseTryCatch();
  1201. return;
  1202. case tok::identifier: {
  1203. if (FormatTok->is(TT_MacroBlockEnd)) {
  1204. addUnwrappedLine();
  1205. return;
  1206. }
  1207. // Function declarations (as opposed to function expressions) are parsed
  1208. // on their own unwrapped line by continuing this loop. Function
  1209. // expressions (functions that are not on their own line) must not create
  1210. // a new unwrapped line, so they are special cased below.
  1211. size_t TokenCount = Line->Tokens.size();
  1212. if (Style.Language == FormatStyle::LK_JavaScript &&
  1213. FormatTok->is(Keywords.kw_function) &&
  1214. (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
  1215. Keywords.kw_async)))) {
  1216. tryToParseJSFunction();
  1217. break;
  1218. }
  1219. if ((Style.Language == FormatStyle::LK_JavaScript ||
  1220. Style.Language == FormatStyle::LK_Java) &&
  1221. FormatTok->is(Keywords.kw_interface)) {
  1222. if (Style.Language == FormatStyle::LK_JavaScript) {
  1223. // In JavaScript/TypeScript, "interface" can be used as a standalone
  1224. // identifier, e.g. in `var interface = 1;`. If "interface" is
  1225. // followed by another identifier, it is very like to be an actual
  1226. // interface declaration.
  1227. unsigned StoredPosition = Tokens->getPosition();
  1228. FormatToken *Next = Tokens->getNextToken();
  1229. FormatTok = Tokens->setPosition(StoredPosition);
  1230. if (Next && !mustBeJSIdent(Keywords, Next)) {
  1231. nextToken();
  1232. break;
  1233. }
  1234. }
  1235. parseRecord();
  1236. addUnwrappedLine();
  1237. return;
  1238. }
  1239. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1240. parseStatementMacro();
  1241. return;
  1242. }
  1243. // See if the following token should start a new unwrapped line.
  1244. StringRef Text = FormatTok->TokenText;
  1245. nextToken();
  1246. // JS doesn't have macros, and within classes colons indicate fields, not
  1247. // labels.
  1248. if (Style.Language == FormatStyle::LK_JavaScript)
  1249. break;
  1250. TokenCount = Line->Tokens.size();
  1251. if (TokenCount == 1 ||
  1252. (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
  1253. if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
  1254. Line->Tokens.begin()->Tok->MustBreakBefore = true;
  1255. parseLabel(!Style.IndentGotoLabels);
  1256. return;
  1257. }
  1258. // Recognize function-like macro usages without trailing semicolon as
  1259. // well as free-standing macros like Q_OBJECT.
  1260. bool FunctionLike = FormatTok->is(tok::l_paren);
  1261. if (FunctionLike)
  1262. parseParens();
  1263. bool FollowedByNewline =
  1264. CommentsBeforeNextToken.empty()
  1265. ? FormatTok->NewlinesBefore > 0
  1266. : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
  1267. if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
  1268. tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
  1269. addUnwrappedLine();
  1270. return;
  1271. }
  1272. }
  1273. break;
  1274. }
  1275. case tok::equal:
  1276. // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
  1277. // TT_JsFatArrow. The always start an expression or a child block if
  1278. // followed by a curly.
  1279. if (FormatTok->is(TT_JsFatArrow)) {
  1280. nextToken();
  1281. if (FormatTok->is(tok::l_brace))
  1282. parseChildBlock();
  1283. break;
  1284. }
  1285. nextToken();
  1286. if (FormatTok->Tok.is(tok::l_brace)) {
  1287. nextToken();
  1288. parseBracedList();
  1289. } else if (Style.Language == FormatStyle::LK_Proto &&
  1290. FormatTok->Tok.is(tok::less)) {
  1291. nextToken();
  1292. parseBracedList(/*ContinueOnSemicolons=*/false,
  1293. /*ClosingBraceKind=*/tok::greater);
  1294. }
  1295. break;
  1296. case tok::l_square:
  1297. parseSquare();
  1298. break;
  1299. case tok::kw_new:
  1300. parseNew();
  1301. break;
  1302. default:
  1303. nextToken();
  1304. break;
  1305. }
  1306. } while (!eof());
  1307. }
  1308. bool UnwrappedLineParser::tryToParseLambda() {
  1309. if (!Style.isCpp()) {
  1310. nextToken();
  1311. return false;
  1312. }
  1313. assert(FormatTok->is(tok::l_square));
  1314. FormatToken &LSquare = *FormatTok;
  1315. if (!tryToParseLambdaIntroducer())
  1316. return false;
  1317. bool SeenArrow = false;
  1318. while (FormatTok->isNot(tok::l_brace)) {
  1319. if (FormatTok->isSimpleTypeSpecifier()) {
  1320. nextToken();
  1321. continue;
  1322. }
  1323. switch (FormatTok->Tok.getKind()) {
  1324. case tok::l_brace:
  1325. break;
  1326. case tok::l_paren:
  1327. parseParens();
  1328. break;
  1329. case tok::amp:
  1330. case tok::star:
  1331. case tok::kw_const:
  1332. case tok::comma:
  1333. case tok::less:
  1334. case tok::greater:
  1335. case tok::identifier:
  1336. case tok::numeric_constant:
  1337. case tok::coloncolon:
  1338. case tok::kw_class:
  1339. case tok::kw_mutable:
  1340. case tok::kw_noexcept:
  1341. case tok::kw_template:
  1342. case tok::kw_typename:
  1343. nextToken();
  1344. break;
  1345. // Specialization of a template with an integer parameter can contain
  1346. // arithmetic, logical, comparison and ternary operators.
  1347. //
  1348. // FIXME: This also accepts sequences of operators that are not in the scope
  1349. // of a template argument list.
  1350. //
  1351. // In a C++ lambda a template type can only occur after an arrow. We use
  1352. // this as an heuristic to distinguish between Objective-C expressions
  1353. // followed by an `a->b` expression, such as:
  1354. // ([obj func:arg] + a->b)
  1355. // Otherwise the code below would parse as a lambda.
  1356. //
  1357. // FIXME: This heuristic is incorrect for C++20 generic lambdas with
  1358. // explicit template lists: []<bool b = true && false>(U &&u){}
  1359. case tok::plus:
  1360. case tok::minus:
  1361. case tok::exclaim:
  1362. case tok::tilde:
  1363. case tok::slash:
  1364. case tok::percent:
  1365. case tok::lessless:
  1366. case tok::pipe:
  1367. case tok::pipepipe:
  1368. case tok::ampamp:
  1369. case tok::caret:
  1370. case tok::equalequal:
  1371. case tok::exclaimequal:
  1372. case tok::greaterequal:
  1373. case tok::lessequal:
  1374. case tok::question:
  1375. case tok::colon:
  1376. case tok::kw_true:
  1377. case tok::kw_false:
  1378. if (SeenArrow) {
  1379. nextToken();
  1380. break;
  1381. }
  1382. return true;
  1383. case tok::arrow:
  1384. // This might or might not actually be a lambda arrow (this could be an
  1385. // ObjC method invocation followed by a dereferencing arrow). We might
  1386. // reset this back to TT_Unknown in TokenAnnotator.
  1387. FormatTok->Type = TT_LambdaArrow;
  1388. SeenArrow = true;
  1389. nextToken();
  1390. break;
  1391. default:
  1392. return true;
  1393. }
  1394. }
  1395. FormatTok->Type = TT_LambdaLBrace;
  1396. LSquare.Type = TT_LambdaLSquare;
  1397. parseChildBlock();
  1398. return true;
  1399. }
  1400. bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
  1401. const FormatToken *Previous = FormatTok->Previous;
  1402. if (Previous &&
  1403. (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
  1404. tok::kw_delete, tok::l_square) ||
  1405. FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
  1406. Previous->isSimpleTypeSpecifier())) {
  1407. nextToken();
  1408. return false;
  1409. }
  1410. nextToken();
  1411. if (FormatTok->is(tok::l_square)) {
  1412. return false;
  1413. }
  1414. parseSquare(/*LambdaIntroducer=*/true);
  1415. return true;
  1416. }
  1417. void UnwrappedLineParser::tryToParseJSFunction() {
  1418. assert(FormatTok->is(Keywords.kw_function) ||
  1419. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
  1420. if (FormatTok->is(Keywords.kw_async))
  1421. nextToken();
  1422. // Consume "function".
  1423. nextToken();
  1424. // Consume * (generator function). Treat it like C++'s overloaded operators.
  1425. if (FormatTok->is(tok::star)) {
  1426. FormatTok->Type = TT_OverloadedOperator;
  1427. nextToken();
  1428. }
  1429. // Consume function name.
  1430. if (FormatTok->is(tok::identifier))
  1431. nextToken();
  1432. if (FormatTok->isNot(tok::l_paren))
  1433. return;
  1434. // Parse formal parameter list.
  1435. parseParens();
  1436. if (FormatTok->is(tok::colon)) {
  1437. // Parse a type definition.
  1438. nextToken();
  1439. // Eat the type declaration. For braced inline object types, balance braces,
  1440. // otherwise just parse until finding an l_brace for the function body.
  1441. if (FormatTok->is(tok::l_brace))
  1442. tryToParseBracedList();
  1443. else
  1444. while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
  1445. nextToken();
  1446. }
  1447. if (FormatTok->is(tok::semi))
  1448. return;
  1449. parseChildBlock();
  1450. }
  1451. bool UnwrappedLineParser::tryToParseBracedList() {
  1452. if (FormatTok->BlockKind == BK_Unknown)
  1453. calculateBraceTypes();
  1454. assert(FormatTok->BlockKind != BK_Unknown);
  1455. if (FormatTok->BlockKind == BK_Block)
  1456. return false;
  1457. nextToken();
  1458. parseBracedList();
  1459. return true;
  1460. }
  1461. bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
  1462. tok::TokenKind ClosingBraceKind) {
  1463. bool HasError = false;
  1464. // FIXME: Once we have an expression parser in the UnwrappedLineParser,
  1465. // replace this by using parseAssigmentExpression() inside.
  1466. do {
  1467. if (Style.Language == FormatStyle::LK_JavaScript) {
  1468. if (FormatTok->is(Keywords.kw_function) ||
  1469. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
  1470. tryToParseJSFunction();
  1471. continue;
  1472. }
  1473. if (FormatTok->is(TT_JsFatArrow)) {
  1474. nextToken();
  1475. // Fat arrows can be followed by simple expressions or by child blocks
  1476. // in curly braces.
  1477. if (FormatTok->is(tok::l_brace)) {
  1478. parseChildBlock();
  1479. continue;
  1480. }
  1481. }
  1482. if (FormatTok->is(tok::l_brace)) {
  1483. // Could be a method inside of a braced list `{a() { return 1; }}`.
  1484. if (tryToParseBracedList())
  1485. continue;
  1486. parseChildBlock();
  1487. }
  1488. }
  1489. if (FormatTok->Tok.getKind() == ClosingBraceKind) {
  1490. nextToken();
  1491. return !HasError;
  1492. }
  1493. switch (FormatTok->Tok.getKind()) {
  1494. case tok::caret:
  1495. nextToken();
  1496. if (FormatTok->is(tok::l_brace)) {
  1497. parseChildBlock();
  1498. }
  1499. break;
  1500. case tok::l_square:
  1501. tryToParseLambda();
  1502. break;
  1503. case tok::l_paren:
  1504. parseParens();
  1505. // JavaScript can just have free standing methods and getters/setters in
  1506. // object literals. Detect them by a "{" following ")".
  1507. if (Style.Language == FormatStyle::LK_JavaScript) {
  1508. if (FormatTok->is(tok::l_brace))
  1509. parseChildBlock();
  1510. break;
  1511. }
  1512. break;
  1513. case tok::l_brace:
  1514. // Assume there are no blocks inside a braced init list apart
  1515. // from the ones we explicitly parse out (like lambdas).
  1516. FormatTok->BlockKind = BK_BracedInit;
  1517. nextToken();
  1518. parseBracedList();
  1519. break;
  1520. case tok::less:
  1521. if (Style.Language == FormatStyle::LK_Proto) {
  1522. nextToken();
  1523. parseBracedList(/*ContinueOnSemicolons=*/false,
  1524. /*ClosingBraceKind=*/tok::greater);
  1525. } else {
  1526. nextToken();
  1527. }
  1528. break;
  1529. case tok::semi:
  1530. // JavaScript (or more precisely TypeScript) can have semicolons in braced
  1531. // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
  1532. // used for error recovery if we have otherwise determined that this is
  1533. // a braced list.
  1534. if (Style.Language == FormatStyle::LK_JavaScript) {
  1535. nextToken();
  1536. break;
  1537. }
  1538. HasError = true;
  1539. if (!ContinueOnSemicolons)
  1540. return !HasError;
  1541. nextToken();
  1542. break;
  1543. case tok::comma:
  1544. nextToken();
  1545. break;
  1546. default:
  1547. nextToken();
  1548. break;
  1549. }
  1550. } while (!eof());
  1551. return false;
  1552. }
  1553. void UnwrappedLineParser::parseParens() {
  1554. assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
  1555. nextToken();
  1556. do {
  1557. switch (FormatTok->Tok.getKind()) {
  1558. case tok::l_paren:
  1559. parseParens();
  1560. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
  1561. parseChildBlock();
  1562. break;
  1563. case tok::r_paren:
  1564. nextToken();
  1565. return;
  1566. case tok::r_brace:
  1567. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1568. return;
  1569. case tok::l_square:
  1570. tryToParseLambda();
  1571. break;
  1572. case tok::l_brace:
  1573. if (!tryToParseBracedList())
  1574. parseChildBlock();
  1575. break;
  1576. case tok::at:
  1577. nextToken();
  1578. if (FormatTok->Tok.is(tok::l_brace)) {
  1579. nextToken();
  1580. parseBracedList();
  1581. }
  1582. break;
  1583. case tok::kw_class:
  1584. if (Style.Language == FormatStyle::LK_JavaScript)
  1585. parseRecord(/*ParseAsExpr=*/true);
  1586. else
  1587. nextToken();
  1588. break;
  1589. case tok::identifier:
  1590. if (Style.Language == FormatStyle::LK_JavaScript &&
  1591. (FormatTok->is(Keywords.kw_function) ||
  1592. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
  1593. tryToParseJSFunction();
  1594. else
  1595. nextToken();
  1596. break;
  1597. default:
  1598. nextToken();
  1599. break;
  1600. }
  1601. } while (!eof());
  1602. }
  1603. void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
  1604. if (!LambdaIntroducer) {
  1605. assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
  1606. if (tryToParseLambda())
  1607. return;
  1608. }
  1609. do {
  1610. switch (FormatTok->Tok.getKind()) {
  1611. case tok::l_paren:
  1612. parseParens();
  1613. break;
  1614. case tok::r_square:
  1615. nextToken();
  1616. return;
  1617. case tok::r_brace:
  1618. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1619. return;
  1620. case tok::l_square:
  1621. parseSquare();
  1622. break;
  1623. case tok::l_brace: {
  1624. if (!tryToParseBracedList())
  1625. parseChildBlock();
  1626. break;
  1627. }
  1628. case tok::at:
  1629. nextToken();
  1630. if (FormatTok->Tok.is(tok::l_brace)) {
  1631. nextToken();
  1632. parseBracedList();
  1633. }
  1634. break;
  1635. default:
  1636. nextToken();
  1637. break;
  1638. }
  1639. } while (!eof());
  1640. }
  1641. void UnwrappedLineParser::parseIfThenElse() {
  1642. assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
  1643. nextToken();
  1644. if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
  1645. nextToken();
  1646. if (FormatTok->Tok.is(tok::l_paren))
  1647. parseParens();
  1648. bool NeedsUnwrappedLine = false;
  1649. if (FormatTok->Tok.is(tok::l_brace)) {
  1650. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1651. parseBlock(/*MustBeDeclaration=*/false);
  1652. if (Style.BraceWrapping.BeforeElse)
  1653. addUnwrappedLine();
  1654. else
  1655. NeedsUnwrappedLine = true;
  1656. } else {
  1657. addUnwrappedLine();
  1658. ++Line->Level;
  1659. parseStructuralElement();
  1660. --Line->Level;
  1661. }
  1662. if (FormatTok->Tok.is(tok::kw_else)) {
  1663. nextToken();
  1664. if (FormatTok->Tok.is(tok::l_brace)) {
  1665. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1666. parseBlock(/*MustBeDeclaration=*/false);
  1667. addUnwrappedLine();
  1668. } else if (FormatTok->Tok.is(tok::kw_if)) {
  1669. parseIfThenElse();
  1670. } else {
  1671. addUnwrappedLine();
  1672. ++Line->Level;
  1673. parseStructuralElement();
  1674. if (FormatTok->is(tok::eof))
  1675. addUnwrappedLine();
  1676. --Line->Level;
  1677. }
  1678. } else if (NeedsUnwrappedLine) {
  1679. addUnwrappedLine();
  1680. }
  1681. }
  1682. void UnwrappedLineParser::parseTryCatch() {
  1683. assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
  1684. nextToken();
  1685. bool NeedsUnwrappedLine = false;
  1686. if (FormatTok->is(tok::colon)) {
  1687. // We are in a function try block, what comes is an initializer list.
  1688. nextToken();
  1689. while (FormatTok->is(tok::identifier)) {
  1690. nextToken();
  1691. if (FormatTok->is(tok::l_paren))
  1692. parseParens();
  1693. if (FormatTok->is(tok::comma))
  1694. nextToken();
  1695. }
  1696. }
  1697. // Parse try with resource.
  1698. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
  1699. parseParens();
  1700. }
  1701. if (FormatTok->is(tok::l_brace)) {
  1702. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1703. parseBlock(/*MustBeDeclaration=*/false);
  1704. if (Style.BraceWrapping.BeforeCatch) {
  1705. addUnwrappedLine();
  1706. } else {
  1707. NeedsUnwrappedLine = true;
  1708. }
  1709. } else if (!FormatTok->is(tok::kw_catch)) {
  1710. // The C++ standard requires a compound-statement after a try.
  1711. // If there's none, we try to assume there's a structuralElement
  1712. // and try to continue.
  1713. addUnwrappedLine();
  1714. ++Line->Level;
  1715. parseStructuralElement();
  1716. --Line->Level;
  1717. }
  1718. while (1) {
  1719. if (FormatTok->is(tok::at))
  1720. nextToken();
  1721. if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
  1722. tok::kw___finally) ||
  1723. ((Style.Language == FormatStyle::LK_Java ||
  1724. Style.Language == FormatStyle::LK_JavaScript) &&
  1725. FormatTok->is(Keywords.kw_finally)) ||
  1726. (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
  1727. FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
  1728. break;
  1729. nextToken();
  1730. while (FormatTok->isNot(tok::l_brace)) {
  1731. if (FormatTok->is(tok::l_paren)) {
  1732. parseParens();
  1733. continue;
  1734. }
  1735. if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
  1736. return;
  1737. nextToken();
  1738. }
  1739. NeedsUnwrappedLine = false;
  1740. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1741. parseBlock(/*MustBeDeclaration=*/false);
  1742. if (Style.BraceWrapping.BeforeCatch)
  1743. addUnwrappedLine();
  1744. else
  1745. NeedsUnwrappedLine = true;
  1746. }
  1747. if (NeedsUnwrappedLine)
  1748. addUnwrappedLine();
  1749. }
  1750. void UnwrappedLineParser::parseNamespace() {
  1751. assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  1752. "'namespace' expected");
  1753. const FormatToken &InitialToken = *FormatTok;
  1754. nextToken();
  1755. if (InitialToken.is(TT_NamespaceMacro)) {
  1756. parseParens();
  1757. } else {
  1758. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
  1759. tok::l_square)) {
  1760. if (FormatTok->is(tok::l_square))
  1761. parseSquare();
  1762. else
  1763. nextToken();
  1764. }
  1765. }
  1766. if (FormatTok->Tok.is(tok::l_brace)) {
  1767. if (ShouldBreakBeforeBrace(Style, InitialToken))
  1768. addUnwrappedLine();
  1769. bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
  1770. (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
  1771. DeclarationScopeStack.size() > 1);
  1772. parseBlock(/*MustBeDeclaration=*/true, AddLevel);
  1773. // Munch the semicolon after a namespace. This is more common than one would
  1774. // think. Puttin the semicolon into its own line is very ugly.
  1775. if (FormatTok->Tok.is(tok::semi))
  1776. nextToken();
  1777. addUnwrappedLine();
  1778. }
  1779. // FIXME: Add error handling.
  1780. }
  1781. void UnwrappedLineParser::parseNew() {
  1782. assert(FormatTok->is(tok::kw_new) && "'new' expected");
  1783. nextToken();
  1784. if (Style.Language != FormatStyle::LK_Java)
  1785. return;
  1786. // In Java, we can parse everything up to the parens, which aren't optional.
  1787. do {
  1788. // There should not be a ;, { or } before the new's open paren.
  1789. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
  1790. return;
  1791. // Consume the parens.
  1792. if (FormatTok->is(tok::l_paren)) {
  1793. parseParens();
  1794. // If there is a class body of an anonymous class, consume that as child.
  1795. if (FormatTok->is(tok::l_brace))
  1796. parseChildBlock();
  1797. return;
  1798. }
  1799. nextToken();
  1800. } while (!eof());
  1801. }
  1802. void UnwrappedLineParser::parseForOrWhileLoop() {
  1803. assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
  1804. "'for', 'while' or foreach macro expected");
  1805. nextToken();
  1806. // JS' for await ( ...
  1807. if (Style.Language == FormatStyle::LK_JavaScript &&
  1808. FormatTok->is(Keywords.kw_await))
  1809. nextToken();
  1810. if (FormatTok->Tok.is(tok::l_paren))
  1811. parseParens();
  1812. if (FormatTok->Tok.is(tok::l_brace)) {
  1813. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1814. parseBlock(/*MustBeDeclaration=*/false);
  1815. addUnwrappedLine();
  1816. } else {
  1817. addUnwrappedLine();
  1818. ++Line->Level;
  1819. parseStructuralElement();
  1820. --Line->Level;
  1821. }
  1822. }
  1823. void UnwrappedLineParser::parseDoWhile() {
  1824. assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
  1825. nextToken();
  1826. if (FormatTok->Tok.is(tok::l_brace)) {
  1827. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1828. parseBlock(/*MustBeDeclaration=*/false);
  1829. if (Style.BraceWrapping.IndentBraces)
  1830. addUnwrappedLine();
  1831. } else {
  1832. addUnwrappedLine();
  1833. ++Line->Level;
  1834. parseStructuralElement();
  1835. --Line->Level;
  1836. }
  1837. // FIXME: Add error handling.
  1838. if (!FormatTok->Tok.is(tok::kw_while)) {
  1839. addUnwrappedLine();
  1840. return;
  1841. }
  1842. nextToken();
  1843. parseStructuralElement();
  1844. }
  1845. void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
  1846. nextToken();
  1847. unsigned OldLineLevel = Line->Level;
  1848. if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
  1849. --Line->Level;
  1850. if (LeftAlignLabel)
  1851. Line->Level = 0;
  1852. if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
  1853. CompoundStatementIndenter Indenter(this, Line->Level,
  1854. Style.BraceWrapping.AfterCaseLabel,
  1855. Style.BraceWrapping.IndentBraces);
  1856. parseBlock(/*MustBeDeclaration=*/false);
  1857. if (FormatTok->Tok.is(tok::kw_break)) {
  1858. if (Style.BraceWrapping.AfterControlStatement ==
  1859. FormatStyle::BWACS_Always)
  1860. addUnwrappedLine();
  1861. parseStructuralElement();
  1862. }
  1863. addUnwrappedLine();
  1864. } else {
  1865. if (FormatTok->is(tok::semi))
  1866. nextToken();
  1867. addUnwrappedLine();
  1868. }
  1869. Line->Level = OldLineLevel;
  1870. if (FormatTok->isNot(tok::l_brace)) {
  1871. parseStructuralElement();
  1872. addUnwrappedLine();
  1873. }
  1874. }
  1875. void UnwrappedLineParser::parseCaseLabel() {
  1876. assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
  1877. // FIXME: fix handling of complex expressions here.
  1878. do {
  1879. nextToken();
  1880. } while (!eof() && !FormatTok->Tok.is(tok::colon));
  1881. parseLabel();
  1882. }
  1883. void UnwrappedLineParser::parseSwitch() {
  1884. assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
  1885. nextToken();
  1886. if (FormatTok->Tok.is(tok::l_paren))
  1887. parseParens();
  1888. if (FormatTok->Tok.is(tok::l_brace)) {
  1889. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1890. parseBlock(/*MustBeDeclaration=*/false);
  1891. addUnwrappedLine();
  1892. } else {
  1893. addUnwrappedLine();
  1894. ++Line->Level;
  1895. parseStructuralElement();
  1896. --Line->Level;
  1897. }
  1898. }
  1899. void UnwrappedLineParser::parseAccessSpecifier() {
  1900. nextToken();
  1901. // Understand Qt's slots.
  1902. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
  1903. nextToken();
  1904. // Otherwise, we don't know what it is, and we'd better keep the next token.
  1905. if (FormatTok->Tok.is(tok::colon))
  1906. nextToken();
  1907. addUnwrappedLine();
  1908. }
  1909. bool UnwrappedLineParser::parseEnum() {
  1910. // Won't be 'enum' for NS_ENUMs.
  1911. if (FormatTok->Tok.is(tok::kw_enum))
  1912. nextToken();
  1913. // In TypeScript, "enum" can also be used as property name, e.g. in interface
  1914. // declarations. An "enum" keyword followed by a colon would be a syntax
  1915. // error and thus assume it is just an identifier.
  1916. if (Style.Language == FormatStyle::LK_JavaScript &&
  1917. FormatTok->isOneOf(tok::colon, tok::question))
  1918. return false;
  1919. // In protobuf, "enum" can be used as a field name.
  1920. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
  1921. return false;
  1922. // Eat up enum class ...
  1923. if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
  1924. nextToken();
  1925. while (FormatTok->Tok.getIdentifierInfo() ||
  1926. FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
  1927. tok::greater, tok::comma, tok::question)) {
  1928. nextToken();
  1929. // We can have macros or attributes in between 'enum' and the enum name.
  1930. if (FormatTok->is(tok::l_paren))
  1931. parseParens();
  1932. if (FormatTok->is(tok::identifier)) {
  1933. nextToken();
  1934. // If there are two identifiers in a row, this is likely an elaborate
  1935. // return type. In Java, this can be "implements", etc.
  1936. if (Style.isCpp() && FormatTok->is(tok::identifier))
  1937. return false;
  1938. }
  1939. }
  1940. // Just a declaration or something is wrong.
  1941. if (FormatTok->isNot(tok::l_brace))
  1942. return true;
  1943. FormatTok->BlockKind = BK_Block;
  1944. if (Style.Language == FormatStyle::LK_Java) {
  1945. // Java enums are different.
  1946. parseJavaEnumBody();
  1947. return true;
  1948. }
  1949. if (Style.Language == FormatStyle::LK_Proto) {
  1950. parseBlock(/*MustBeDeclaration=*/true);
  1951. return true;
  1952. }
  1953. // Parse enum body.
  1954. nextToken();
  1955. bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
  1956. if (HasError) {
  1957. if (FormatTok->is(tok::semi))
  1958. nextToken();
  1959. addUnwrappedLine();
  1960. }
  1961. return true;
  1962. // There is no addUnwrappedLine() here so that we fall through to parsing a
  1963. // structural element afterwards. Thus, in "enum A {} n, m;",
  1964. // "} n, m;" will end up in one unwrapped line.
  1965. }
  1966. void UnwrappedLineParser::parseJavaEnumBody() {
  1967. // Determine whether the enum is simple, i.e. does not have a semicolon or
  1968. // constants with class bodies. Simple enums can be formatted like braced
  1969. // lists, contracted to a single line, etc.
  1970. unsigned StoredPosition = Tokens->getPosition();
  1971. bool IsSimple = true;
  1972. FormatToken *Tok = Tokens->getNextToken();
  1973. while (Tok) {
  1974. if (Tok->is(tok::r_brace))
  1975. break;
  1976. if (Tok->isOneOf(tok::l_brace, tok::semi)) {
  1977. IsSimple = false;
  1978. break;
  1979. }
  1980. // FIXME: This will also mark enums with braces in the arguments to enum
  1981. // constants as "not simple". This is probably fine in practice, though.
  1982. Tok = Tokens->getNextToken();
  1983. }
  1984. FormatTok = Tokens->setPosition(StoredPosition);
  1985. if (IsSimple) {
  1986. nextToken();
  1987. parseBracedList();
  1988. addUnwrappedLine();
  1989. return;
  1990. }
  1991. // Parse the body of a more complex enum.
  1992. // First add a line for everything up to the "{".
  1993. nextToken();
  1994. addUnwrappedLine();
  1995. ++Line->Level;
  1996. // Parse the enum constants.
  1997. while (FormatTok) {
  1998. if (FormatTok->is(tok::l_brace)) {
  1999. // Parse the constant's class body.
  2000. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  2001. /*MunchSemi=*/false);
  2002. } else if (FormatTok->is(tok::l_paren)) {
  2003. parseParens();
  2004. } else if (FormatTok->is(tok::comma)) {
  2005. nextToken();
  2006. addUnwrappedLine();
  2007. } else if (FormatTok->is(tok::semi)) {
  2008. nextToken();
  2009. addUnwrappedLine();
  2010. break;
  2011. } else if (FormatTok->is(tok::r_brace)) {
  2012. addUnwrappedLine();
  2013. break;
  2014. } else {
  2015. nextToken();
  2016. }
  2017. }
  2018. // Parse the class body after the enum's ";" if any.
  2019. parseLevel(/*HasOpeningBrace=*/true);
  2020. nextToken();
  2021. --Line->Level;
  2022. addUnwrappedLine();
  2023. }
  2024. void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
  2025. const FormatToken &InitialToken = *FormatTok;
  2026. nextToken();
  2027. // The actual identifier can be a nested name specifier, and in macros
  2028. // it is often token-pasted.
  2029. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
  2030. tok::kw___attribute, tok::kw___declspec,
  2031. tok::kw_alignas) ||
  2032. ((Style.Language == FormatStyle::LK_Java ||
  2033. Style.Language == FormatStyle::LK_JavaScript) &&
  2034. FormatTok->isOneOf(tok::period, tok::comma))) {
  2035. if (Style.Language == FormatStyle::LK_JavaScript &&
  2036. FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
  2037. // JavaScript/TypeScript supports inline object types in
  2038. // extends/implements positions:
  2039. // class Foo implements {bar: number} { }
  2040. nextToken();
  2041. if (FormatTok->is(tok::l_brace)) {
  2042. tryToParseBracedList();
  2043. continue;
  2044. }
  2045. }
  2046. bool IsNonMacroIdentifier =
  2047. FormatTok->is(tok::identifier) &&
  2048. FormatTok->TokenText != FormatTok->TokenText.upper();
  2049. nextToken();
  2050. // We can have macros or attributes in between 'class' and the class name.
  2051. if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
  2052. parseParens();
  2053. }
  2054. // Note that parsing away template declarations here leads to incorrectly
  2055. // accepting function declarations as record declarations.
  2056. // In general, we cannot solve this problem. Consider:
  2057. // class A<int> B() {}
  2058. // which can be a function definition or a class definition when B() is a
  2059. // macro. If we find enough real-world cases where this is a problem, we
  2060. // can parse for the 'template' keyword in the beginning of the statement,
  2061. // and thus rule out the record production in case there is no template
  2062. // (this would still leave us with an ambiguity between template function
  2063. // and class declarations).
  2064. if (FormatTok->isOneOf(tok::colon, tok::less)) {
  2065. while (!eof()) {
  2066. if (FormatTok->is(tok::l_brace)) {
  2067. calculateBraceTypes(/*ExpectClassBody=*/true);
  2068. if (!tryToParseBracedList())
  2069. break;
  2070. }
  2071. if (FormatTok->Tok.is(tok::semi))
  2072. return;
  2073. nextToken();
  2074. }
  2075. }
  2076. if (FormatTok->Tok.is(tok::l_brace)) {
  2077. if (ParseAsExpr) {
  2078. parseChildBlock();
  2079. } else {
  2080. if (ShouldBreakBeforeBrace(Style, InitialToken))
  2081. addUnwrappedLine();
  2082. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  2083. /*MunchSemi=*/false);
  2084. }
  2085. }
  2086. // There is no addUnwrappedLine() here so that we fall through to parsing a
  2087. // structural element afterwards. Thus, in "class A {} n, m;",
  2088. // "} n, m;" will end up in one unwrapped line.
  2089. }
  2090. void UnwrappedLineParser::parseObjCMethod() {
  2091. assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
  2092. "'(' or identifier expected.");
  2093. do {
  2094. if (FormatTok->Tok.is(tok::semi)) {
  2095. nextToken();
  2096. addUnwrappedLine();
  2097. return;
  2098. } else if (FormatTok->Tok.is(tok::l_brace)) {
  2099. if (Style.BraceWrapping.AfterFunction)
  2100. addUnwrappedLine();
  2101. parseBlock(/*MustBeDeclaration=*/false);
  2102. addUnwrappedLine();
  2103. return;
  2104. } else {
  2105. nextToken();
  2106. }
  2107. } while (!eof());
  2108. }
  2109. void UnwrappedLineParser::parseObjCProtocolList() {
  2110. assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
  2111. do {
  2112. nextToken();
  2113. // Early exit in case someone forgot a close angle.
  2114. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2115. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2116. return;
  2117. } while (!eof() && FormatTok->Tok.isNot(tok::greater));
  2118. nextToken(); // Skip '>'.
  2119. }
  2120. void UnwrappedLineParser::parseObjCUntilAtEnd() {
  2121. do {
  2122. if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
  2123. nextToken();
  2124. addUnwrappedLine();
  2125. break;
  2126. }
  2127. if (FormatTok->is(tok::l_brace)) {
  2128. parseBlock(/*MustBeDeclaration=*/false);
  2129. // In ObjC interfaces, nothing should be following the "}".
  2130. addUnwrappedLine();
  2131. } else if (FormatTok->is(tok::r_brace)) {
  2132. // Ignore stray "}". parseStructuralElement doesn't consume them.
  2133. nextToken();
  2134. addUnwrappedLine();
  2135. } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
  2136. nextToken();
  2137. parseObjCMethod();
  2138. } else {
  2139. parseStructuralElement();
  2140. }
  2141. } while (!eof());
  2142. }
  2143. void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
  2144. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
  2145. FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
  2146. nextToken();
  2147. nextToken(); // interface name
  2148. // @interface can be followed by a lightweight generic
  2149. // specialization list, then either a base class or a category.
  2150. if (FormatTok->Tok.is(tok::less)) {
  2151. // Unlike protocol lists, generic parameterizations support
  2152. // nested angles:
  2153. //
  2154. // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
  2155. // NSObject <NSCopying, NSSecureCoding>
  2156. //
  2157. // so we need to count how many open angles we have left.
  2158. unsigned NumOpenAngles = 1;
  2159. do {
  2160. nextToken();
  2161. // Early exit in case someone forgot a close angle.
  2162. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2163. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2164. break;
  2165. if (FormatTok->Tok.is(tok::less))
  2166. ++NumOpenAngles;
  2167. else if (FormatTok->Tok.is(tok::greater)) {
  2168. assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
  2169. --NumOpenAngles;
  2170. }
  2171. } while (!eof() && NumOpenAngles != 0);
  2172. nextToken(); // Skip '>'.
  2173. }
  2174. if (FormatTok->Tok.is(tok::colon)) {
  2175. nextToken();
  2176. nextToken(); // base class name
  2177. } else if (FormatTok->Tok.is(tok::l_paren))
  2178. // Skip category, if present.
  2179. parseParens();
  2180. if (FormatTok->Tok.is(tok::less))
  2181. parseObjCProtocolList();
  2182. if (FormatTok->Tok.is(tok::l_brace)) {
  2183. if (Style.BraceWrapping.AfterObjCDeclaration)
  2184. addUnwrappedLine();
  2185. parseBlock(/*MustBeDeclaration=*/true);
  2186. }
  2187. // With instance variables, this puts '}' on its own line. Without instance
  2188. // variables, this ends the @interface line.
  2189. addUnwrappedLine();
  2190. parseObjCUntilAtEnd();
  2191. }
  2192. // Returns true for the declaration/definition form of @protocol,
  2193. // false for the expression form.
  2194. bool UnwrappedLineParser::parseObjCProtocol() {
  2195. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
  2196. nextToken();
  2197. if (FormatTok->is(tok::l_paren))
  2198. // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
  2199. return false;
  2200. // The definition/declaration form,
  2201. // @protocol Foo
  2202. // - (int)someMethod;
  2203. // @end
  2204. nextToken(); // protocol name
  2205. if (FormatTok->Tok.is(tok::less))
  2206. parseObjCProtocolList();
  2207. // Check for protocol declaration.
  2208. if (FormatTok->Tok.is(tok::semi)) {
  2209. nextToken();
  2210. addUnwrappedLine();
  2211. return true;
  2212. }
  2213. addUnwrappedLine();
  2214. parseObjCUntilAtEnd();
  2215. return true;
  2216. }
  2217. void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
  2218. bool IsImport = FormatTok->is(Keywords.kw_import);
  2219. assert(IsImport || FormatTok->is(tok::kw_export));
  2220. nextToken();
  2221. // Consume the "default" in "export default class/function".
  2222. if (FormatTok->is(tok::kw_default))
  2223. nextToken();
  2224. // Consume "async function", "function" and "default function", so that these
  2225. // get parsed as free-standing JS functions, i.e. do not require a trailing
  2226. // semicolon.
  2227. if (FormatTok->is(Keywords.kw_async))
  2228. nextToken();
  2229. if (FormatTok->is(Keywords.kw_function)) {
  2230. nextToken();
  2231. return;
  2232. }
  2233. // For imports, `export *`, `export {...}`, consume the rest of the line up
  2234. // to the terminating `;`. For everything else, just return and continue
  2235. // parsing the structural element, i.e. the declaration or expression for
  2236. // `export default`.
  2237. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
  2238. !FormatTok->isStringLiteral())
  2239. return;
  2240. while (!eof()) {
  2241. if (FormatTok->is(tok::semi))
  2242. return;
  2243. if (Line->Tokens.empty()) {
  2244. // Common issue: Automatic Semicolon Insertion wrapped the line, so the
  2245. // import statement should terminate.
  2246. return;
  2247. }
  2248. if (FormatTok->is(tok::l_brace)) {
  2249. FormatTok->BlockKind = BK_Block;
  2250. nextToken();
  2251. parseBracedList();
  2252. } else {
  2253. nextToken();
  2254. }
  2255. }
  2256. }
  2257. void UnwrappedLineParser::parseStatementMacro() {
  2258. nextToken();
  2259. if (FormatTok->is(tok::l_paren))
  2260. parseParens();
  2261. if (FormatTok->is(tok::semi))
  2262. nextToken();
  2263. addUnwrappedLine();
  2264. }
  2265. LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
  2266. StringRef Prefix = "") {
  2267. llvm::dbgs() << Prefix << "Line(" << Line.Level
  2268. << ", FSC=" << Line.FirstStartColumn << ")"
  2269. << (Line.InPPDirective ? " MACRO" : "") << ": ";
  2270. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2271. E = Line.Tokens.end();
  2272. I != E; ++I) {
  2273. llvm::dbgs() << I->Tok->Tok.getName() << "["
  2274. << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
  2275. << "] ";
  2276. }
  2277. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2278. E = Line.Tokens.end();
  2279. I != E; ++I) {
  2280. const UnwrappedLineNode &Node = *I;
  2281. for (SmallVectorImpl<UnwrappedLine>::const_iterator
  2282. I = Node.Children.begin(),
  2283. E = Node.Children.end();
  2284. I != E; ++I) {
  2285. printDebugInfo(*I, "\nChild: ");
  2286. }
  2287. }
  2288. llvm::dbgs() << "\n";
  2289. }
  2290. void UnwrappedLineParser::addUnwrappedLine() {
  2291. if (Line->Tokens.empty())
  2292. return;
  2293. LLVM_DEBUG({
  2294. if (CurrentLines == &Lines)
  2295. printDebugInfo(*Line);
  2296. });
  2297. CurrentLines->push_back(std::move(*Line));
  2298. Line->Tokens.clear();
  2299. Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
  2300. Line->FirstStartColumn = 0;
  2301. if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
  2302. CurrentLines->append(
  2303. std::make_move_iterator(PreprocessorDirectives.begin()),
  2304. std::make_move_iterator(PreprocessorDirectives.end()));
  2305. PreprocessorDirectives.clear();
  2306. }
  2307. // Disconnect the current token from the last token on the previous line.
  2308. FormatTok->Previous = nullptr;
  2309. }
  2310. bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
  2311. bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
  2312. return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
  2313. FormatTok.NewlinesBefore > 0;
  2314. }
  2315. // Checks if \p FormatTok is a line comment that continues the line comment
  2316. // section on \p Line.
  2317. static bool continuesLineCommentSection(const FormatToken &FormatTok,
  2318. const UnwrappedLine &Line,
  2319. llvm::Regex &CommentPragmasRegex) {
  2320. if (Line.Tokens.empty())
  2321. return false;
  2322. StringRef IndentContent = FormatTok.TokenText;
  2323. if (FormatTok.TokenText.startswith("//") ||
  2324. FormatTok.TokenText.startswith("/*"))
  2325. IndentContent = FormatTok.TokenText.substr(2);
  2326. if (CommentPragmasRegex.match(IndentContent))
  2327. return false;
  2328. // If Line starts with a line comment, then FormatTok continues the comment
  2329. // section if its original column is greater or equal to the original start
  2330. // column of the line.
  2331. //
  2332. // Define the min column token of a line as follows: if a line ends in '{' or
  2333. // contains a '{' followed by a line comment, then the min column token is
  2334. // that '{'. Otherwise, the min column token of the line is the first token of
  2335. // the line.
  2336. //
  2337. // If Line starts with a token other than a line comment, then FormatTok
  2338. // continues the comment section if its original column is greater than the
  2339. // original start column of the min column token of the line.
  2340. //
  2341. // For example, the second line comment continues the first in these cases:
  2342. //
  2343. // // first line
  2344. // // second line
  2345. //
  2346. // and:
  2347. //
  2348. // // first line
  2349. // // second line
  2350. //
  2351. // and:
  2352. //
  2353. // int i; // first line
  2354. // // second line
  2355. //
  2356. // and:
  2357. //
  2358. // do { // first line
  2359. // // second line
  2360. // int i;
  2361. // } while (true);
  2362. //
  2363. // and:
  2364. //
  2365. // enum {
  2366. // a, // first line
  2367. // // second line
  2368. // b
  2369. // };
  2370. //
  2371. // The second line comment doesn't continue the first in these cases:
  2372. //
  2373. // // first line
  2374. // // second line
  2375. //
  2376. // and:
  2377. //
  2378. // int i; // first line
  2379. // // second line
  2380. //
  2381. // and:
  2382. //
  2383. // do { // first line
  2384. // // second line
  2385. // int i;
  2386. // } while (true);
  2387. //
  2388. // and:
  2389. //
  2390. // enum {
  2391. // a, // first line
  2392. // // second line
  2393. // };
  2394. const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
  2395. // Scan for '{//'. If found, use the column of '{' as a min column for line
  2396. // comment section continuation.
  2397. const FormatToken *PreviousToken = nullptr;
  2398. for (const UnwrappedLineNode &Node : Line.Tokens) {
  2399. if (PreviousToken && PreviousToken->is(tok::l_brace) &&
  2400. isLineComment(*Node.Tok)) {
  2401. MinColumnToken = PreviousToken;
  2402. break;
  2403. }
  2404. PreviousToken = Node.Tok;
  2405. // Grab the last newline preceding a token in this unwrapped line.
  2406. if (Node.Tok->NewlinesBefore > 0) {
  2407. MinColumnToken = Node.Tok;
  2408. }
  2409. }
  2410. if (PreviousToken && PreviousToken->is(tok::l_brace)) {
  2411. MinColumnToken = PreviousToken;
  2412. }
  2413. return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
  2414. MinColumnToken);
  2415. }
  2416. void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
  2417. bool JustComments = Line->Tokens.empty();
  2418. for (SmallVectorImpl<FormatToken *>::const_iterator
  2419. I = CommentsBeforeNextToken.begin(),
  2420. E = CommentsBeforeNextToken.end();
  2421. I != E; ++I) {
  2422. // Line comments that belong to the same line comment section are put on the
  2423. // same line since later we might want to reflow content between them.
  2424. // Additional fine-grained breaking of line comment sections is controlled
  2425. // by the class BreakableLineCommentSection in case it is desirable to keep
  2426. // several line comment sections in the same unwrapped line.
  2427. //
  2428. // FIXME: Consider putting separate line comment sections as children to the
  2429. // unwrapped line instead.
  2430. (*I)->ContinuesLineCommentSection =
  2431. continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
  2432. if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
  2433. addUnwrappedLine();
  2434. pushToken(*I);
  2435. }
  2436. if (NewlineBeforeNext && JustComments)
  2437. addUnwrappedLine();
  2438. CommentsBeforeNextToken.clear();
  2439. }
  2440. void UnwrappedLineParser::nextToken(int LevelDifference) {
  2441. if (eof())
  2442. return;
  2443. flushComments(isOnNewLine(*FormatTok));
  2444. pushToken(FormatTok);
  2445. FormatToken *Previous = FormatTok;
  2446. if (Style.Language != FormatStyle::LK_JavaScript)
  2447. readToken(LevelDifference);
  2448. else
  2449. readTokenWithJavaScriptASI();
  2450. FormatTok->Previous = Previous;
  2451. }
  2452. void UnwrappedLineParser::distributeComments(
  2453. const SmallVectorImpl<FormatToken *> &Comments,
  2454. const FormatToken *NextTok) {
  2455. // Whether or not a line comment token continues a line is controlled by
  2456. // the method continuesLineCommentSection, with the following caveat:
  2457. //
  2458. // Define a trail of Comments to be a nonempty proper postfix of Comments such
  2459. // that each comment line from the trail is aligned with the next token, if
  2460. // the next token exists. If a trail exists, the beginning of the maximal
  2461. // trail is marked as a start of a new comment section.
  2462. //
  2463. // For example in this code:
  2464. //
  2465. // int a; // line about a
  2466. // // line 1 about b
  2467. // // line 2 about b
  2468. // int b;
  2469. //
  2470. // the two lines about b form a maximal trail, so there are two sections, the
  2471. // first one consisting of the single comment "// line about a" and the
  2472. // second one consisting of the next two comments.
  2473. if (Comments.empty())
  2474. return;
  2475. bool ShouldPushCommentsInCurrentLine = true;
  2476. bool HasTrailAlignedWithNextToken = false;
  2477. unsigned StartOfTrailAlignedWithNextToken = 0;
  2478. if (NextTok) {
  2479. // We are skipping the first element intentionally.
  2480. for (unsigned i = Comments.size() - 1; i > 0; --i) {
  2481. if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
  2482. HasTrailAlignedWithNextToken = true;
  2483. StartOfTrailAlignedWithNextToken = i;
  2484. }
  2485. }
  2486. }
  2487. for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
  2488. FormatToken *FormatTok = Comments[i];
  2489. if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
  2490. FormatTok->ContinuesLineCommentSection = false;
  2491. } else {
  2492. FormatTok->ContinuesLineCommentSection =
  2493. continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
  2494. }
  2495. if (!FormatTok->ContinuesLineCommentSection &&
  2496. (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
  2497. ShouldPushCommentsInCurrentLine = false;
  2498. }
  2499. if (ShouldPushCommentsInCurrentLine) {
  2500. pushToken(FormatTok);
  2501. } else {
  2502. CommentsBeforeNextToken.push_back(FormatTok);
  2503. }
  2504. }
  2505. }
  2506. void UnwrappedLineParser::readToken(int LevelDifference) {
  2507. SmallVector<FormatToken *, 1> Comments;
  2508. do {
  2509. FormatTok = Tokens->getNextToken();
  2510. assert(FormatTok);
  2511. while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
  2512. (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
  2513. distributeComments(Comments, FormatTok);
  2514. Comments.clear();
  2515. // If there is an unfinished unwrapped line, we flush the preprocessor
  2516. // directives only after that unwrapped line was finished later.
  2517. bool SwitchToPreprocessorLines = !Line->Tokens.empty();
  2518. ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
  2519. assert((LevelDifference >= 0 ||
  2520. static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
  2521. "LevelDifference makes Line->Level negative");
  2522. Line->Level += LevelDifference;
  2523. // Comments stored before the preprocessor directive need to be output
  2524. // before the preprocessor directive, at the same level as the
  2525. // preprocessor directive, as we consider them to apply to the directive.
  2526. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
  2527. PPBranchLevel > 0)
  2528. Line->Level += PPBranchLevel;
  2529. flushComments(isOnNewLine(*FormatTok));
  2530. parsePPDirective();
  2531. }
  2532. while (FormatTok->Type == TT_ConflictStart ||
  2533. FormatTok->Type == TT_ConflictEnd ||
  2534. FormatTok->Type == TT_ConflictAlternative) {
  2535. if (FormatTok->Type == TT_ConflictStart) {
  2536. conditionalCompilationStart(/*Unreachable=*/false);
  2537. } else if (FormatTok->Type == TT_ConflictAlternative) {
  2538. conditionalCompilationAlternative();
  2539. } else if (FormatTok->Type == TT_ConflictEnd) {
  2540. conditionalCompilationEnd();
  2541. }
  2542. FormatTok = Tokens->getNextToken();
  2543. FormatTok->MustBreakBefore = true;
  2544. }
  2545. if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
  2546. !Line->InPPDirective) {
  2547. continue;
  2548. }
  2549. if (!FormatTok->Tok.is(tok::comment)) {
  2550. distributeComments(Comments, FormatTok);
  2551. Comments.clear();
  2552. return;
  2553. }
  2554. Comments.push_back(FormatTok);
  2555. } while (!eof());
  2556. distributeComments(Comments, nullptr);
  2557. Comments.clear();
  2558. }
  2559. void UnwrappedLineParser::pushToken(FormatToken *Tok) {
  2560. Line->Tokens.push_back(UnwrappedLineNode(Tok));
  2561. if (MustBreakBeforeNextToken) {
  2562. Line->Tokens.back().Tok->MustBreakBefore = true;
  2563. MustBreakBeforeNextToken = false;
  2564. }
  2565. }
  2566. } // end namespace format
  2567. } // end namespace clang