UnwrappedLineParser.cpp 79 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515
  1. //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. ///
  10. /// \file
  11. /// \brief This file contains the implementation of the UnwrappedLineParser,
  12. /// which turns a stream of tokens into UnwrappedLines.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #include "UnwrappedLineParser.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/Support/Debug.h"
  18. #include "llvm/Support/raw_ostream.h"
  19. #define DEBUG_TYPE "format-parser"
  20. namespace clang {
  21. namespace format {
  22. class FormatTokenSource {
  23. public:
  24. virtual ~FormatTokenSource() {}
  25. virtual FormatToken *getNextToken() = 0;
  26. virtual unsigned getPosition() = 0;
  27. virtual FormatToken *setPosition(unsigned Position) = 0;
  28. };
  29. namespace {
  30. class ScopedDeclarationState {
  31. public:
  32. ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  33. bool MustBeDeclaration)
  34. : Line(Line), Stack(Stack) {
  35. Line.MustBeDeclaration = MustBeDeclaration;
  36. Stack.push_back(MustBeDeclaration);
  37. }
  38. ~ScopedDeclarationState() {
  39. Stack.pop_back();
  40. if (!Stack.empty())
  41. Line.MustBeDeclaration = Stack.back();
  42. else
  43. Line.MustBeDeclaration = true;
  44. }
  45. private:
  46. UnwrappedLine &Line;
  47. std::vector<bool> &Stack;
  48. };
  49. static bool isLineComment(const FormatToken &FormatTok) {
  50. return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
  51. }
  52. // Checks if \p FormatTok is a line comment that continues the line comment
  53. // \p Previous. The original column of \p MinColumnToken is used to determine
  54. // whether \p FormatTok is indented enough to the right to continue \p Previous.
  55. static bool continuesLineComment(const FormatToken &FormatTok,
  56. const FormatToken *Previous,
  57. const FormatToken *MinColumnToken) {
  58. if (!Previous || !MinColumnToken)
  59. return false;
  60. unsigned MinContinueColumn =
  61. MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  62. return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  63. isLineComment(*Previous) &&
  64. FormatTok.OriginalColumn >= MinContinueColumn;
  65. }
  66. class ScopedMacroState : public FormatTokenSource {
  67. public:
  68. ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  69. FormatToken *&ResetToken)
  70. : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  71. PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  72. Token(nullptr), PreviousToken(nullptr) {
  73. TokenSource = this;
  74. Line.Level = 0;
  75. Line.InPPDirective = true;
  76. }
  77. ~ScopedMacroState() override {
  78. TokenSource = PreviousTokenSource;
  79. ResetToken = Token;
  80. Line.InPPDirective = false;
  81. Line.Level = PreviousLineLevel;
  82. }
  83. FormatToken *getNextToken() override {
  84. // The \c UnwrappedLineParser guards against this by never calling
  85. // \c getNextToken() after it has encountered the first eof token.
  86. assert(!eof());
  87. PreviousToken = Token;
  88. Token = PreviousTokenSource->getNextToken();
  89. if (eof())
  90. return getFakeEOF();
  91. return Token;
  92. }
  93. unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  94. FormatToken *setPosition(unsigned Position) override {
  95. PreviousToken = nullptr;
  96. Token = PreviousTokenSource->setPosition(Position);
  97. return Token;
  98. }
  99. private:
  100. bool eof() {
  101. return Token && Token->HasUnescapedNewline &&
  102. !continuesLineComment(*Token, PreviousToken,
  103. /*MinColumnToken=*/PreviousToken);
  104. }
  105. FormatToken *getFakeEOF() {
  106. static bool EOFInitialized = false;
  107. static FormatToken FormatTok;
  108. if (!EOFInitialized) {
  109. FormatTok.Tok.startToken();
  110. FormatTok.Tok.setKind(tok::eof);
  111. EOFInitialized = true;
  112. }
  113. return &FormatTok;
  114. }
  115. UnwrappedLine &Line;
  116. FormatTokenSource *&TokenSource;
  117. FormatToken *&ResetToken;
  118. unsigned PreviousLineLevel;
  119. FormatTokenSource *PreviousTokenSource;
  120. FormatToken *Token;
  121. FormatToken *PreviousToken;
  122. };
  123. } // end anonymous namespace
  124. class ScopedLineState {
  125. public:
  126. ScopedLineState(UnwrappedLineParser &Parser,
  127. bool SwitchToPreprocessorLines = false)
  128. : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  129. if (SwitchToPreprocessorLines)
  130. Parser.CurrentLines = &Parser.PreprocessorDirectives;
  131. else if (!Parser.Line->Tokens.empty())
  132. Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
  133. PreBlockLine = std::move(Parser.Line);
  134. Parser.Line = llvm::make_unique<UnwrappedLine>();
  135. Parser.Line->Level = PreBlockLine->Level;
  136. Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
  137. }
  138. ~ScopedLineState() {
  139. if (!Parser.Line->Tokens.empty()) {
  140. Parser.addUnwrappedLine();
  141. }
  142. assert(Parser.Line->Tokens.empty());
  143. Parser.Line = std::move(PreBlockLine);
  144. if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
  145. Parser.MustBreakBeforeNextToken = true;
  146. Parser.CurrentLines = OriginalLines;
  147. }
  148. private:
  149. UnwrappedLineParser &Parser;
  150. std::unique_ptr<UnwrappedLine> PreBlockLine;
  151. SmallVectorImpl<UnwrappedLine> *OriginalLines;
  152. };
  153. class CompoundStatementIndenter {
  154. public:
  155. CompoundStatementIndenter(UnwrappedLineParser *Parser,
  156. const FormatStyle &Style, unsigned &LineLevel)
  157. : LineLevel(LineLevel), OldLineLevel(LineLevel) {
  158. if (Style.BraceWrapping.AfterControlStatement)
  159. Parser->addUnwrappedLine();
  160. if (Style.BraceWrapping.IndentBraces)
  161. ++LineLevel;
  162. }
  163. ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
  164. private:
  165. unsigned &LineLevel;
  166. unsigned OldLineLevel;
  167. };
  168. namespace {
  169. class IndexedTokenSource : public FormatTokenSource {
  170. public:
  171. IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
  172. : Tokens(Tokens), Position(-1) {}
  173. FormatToken *getNextToken() override {
  174. ++Position;
  175. return Tokens[Position];
  176. }
  177. unsigned getPosition() override {
  178. assert(Position >= 0);
  179. return Position;
  180. }
  181. FormatToken *setPosition(unsigned P) override {
  182. Position = P;
  183. return Tokens[Position];
  184. }
  185. void reset() { Position = -1; }
  186. private:
  187. ArrayRef<FormatToken *> Tokens;
  188. int Position;
  189. };
  190. } // end anonymous namespace
  191. UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
  192. const AdditionalKeywords &Keywords,
  193. unsigned FirstStartColumn,
  194. ArrayRef<FormatToken *> Tokens,
  195. UnwrappedLineConsumer &Callback)
  196. : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
  197. CurrentLines(&Lines), Style(Style), Keywords(Keywords),
  198. CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
  199. Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
  200. IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
  201. IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {}
  202. void UnwrappedLineParser::reset() {
  203. PPBranchLevel = -1;
  204. IfNdefCondition = nullptr;
  205. FoundIncludeGuardStart = false;
  206. IncludeGuardRejected = false;
  207. Line.reset(new UnwrappedLine);
  208. CommentsBeforeNextToken.clear();
  209. FormatTok = nullptr;
  210. MustBreakBeforeNextToken = false;
  211. PreprocessorDirectives.clear();
  212. CurrentLines = &Lines;
  213. DeclarationScopeStack.clear();
  214. PPStack.clear();
  215. Line->FirstStartColumn = FirstStartColumn;
  216. }
  217. void UnwrappedLineParser::parse() {
  218. IndexedTokenSource TokenSource(AllTokens);
  219. Line->FirstStartColumn = FirstStartColumn;
  220. do {
  221. DEBUG(llvm::dbgs() << "----\n");
  222. reset();
  223. Tokens = &TokenSource;
  224. TokenSource.reset();
  225. readToken();
  226. parseFile();
  227. // Create line with eof token.
  228. pushToken(FormatTok);
  229. addUnwrappedLine();
  230. for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
  231. E = Lines.end();
  232. I != E; ++I) {
  233. Callback.consumeUnwrappedLine(*I);
  234. }
  235. Callback.finishRun();
  236. Lines.clear();
  237. while (!PPLevelBranchIndex.empty() &&
  238. PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
  239. PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
  240. PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
  241. }
  242. if (!PPLevelBranchIndex.empty()) {
  243. ++PPLevelBranchIndex.back();
  244. assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
  245. assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
  246. }
  247. } while (!PPLevelBranchIndex.empty());
  248. }
  249. void UnwrappedLineParser::parseFile() {
  250. // The top-level context in a file always has declarations, except for pre-
  251. // processor directives and JavaScript files.
  252. bool MustBeDeclaration =
  253. !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
  254. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  255. MustBeDeclaration);
  256. if (Style.Language == FormatStyle::LK_TextProto)
  257. parseBracedList();
  258. else
  259. parseLevel(/*HasOpeningBrace=*/false);
  260. // Make sure to format the remaining tokens.
  261. flushComments(true);
  262. addUnwrappedLine();
  263. }
  264. void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
  265. bool SwitchLabelEncountered = false;
  266. do {
  267. tok::TokenKind kind = FormatTok->Tok.getKind();
  268. if (FormatTok->Type == TT_MacroBlockBegin) {
  269. kind = tok::l_brace;
  270. } else if (FormatTok->Type == TT_MacroBlockEnd) {
  271. kind = tok::r_brace;
  272. }
  273. switch (kind) {
  274. case tok::comment:
  275. nextToken();
  276. addUnwrappedLine();
  277. break;
  278. case tok::l_brace:
  279. // FIXME: Add parameter whether this can happen - if this happens, we must
  280. // be in a non-declaration context.
  281. if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
  282. continue;
  283. parseBlock(/*MustBeDeclaration=*/false);
  284. addUnwrappedLine();
  285. break;
  286. case tok::r_brace:
  287. if (HasOpeningBrace)
  288. return;
  289. nextToken();
  290. addUnwrappedLine();
  291. break;
  292. case tok::kw_default:
  293. case tok::kw_case:
  294. if (Style.Language == FormatStyle::LK_JavaScript &&
  295. Line->MustBeDeclaration) {
  296. // A 'case: string' style field declaration.
  297. parseStructuralElement();
  298. break;
  299. }
  300. if (!SwitchLabelEncountered &&
  301. (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
  302. ++Line->Level;
  303. SwitchLabelEncountered = true;
  304. parseStructuralElement();
  305. break;
  306. default:
  307. parseStructuralElement();
  308. break;
  309. }
  310. } while (!eof());
  311. }
  312. void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
  313. // We'll parse forward through the tokens until we hit
  314. // a closing brace or eof - note that getNextToken() will
  315. // parse macros, so this will magically work inside macro
  316. // definitions, too.
  317. unsigned StoredPosition = Tokens->getPosition();
  318. FormatToken *Tok = FormatTok;
  319. const FormatToken *PrevTok = Tok->Previous;
  320. // Keep a stack of positions of lbrace tokens. We will
  321. // update information about whether an lbrace starts a
  322. // braced init list or a different block during the loop.
  323. SmallVector<FormatToken *, 8> LBraceStack;
  324. assert(Tok->Tok.is(tok::l_brace));
  325. do {
  326. // Get next non-comment token.
  327. FormatToken *NextTok;
  328. unsigned ReadTokens = 0;
  329. do {
  330. NextTok = Tokens->getNextToken();
  331. ++ReadTokens;
  332. } while (NextTok->is(tok::comment));
  333. switch (Tok->Tok.getKind()) {
  334. case tok::l_brace:
  335. if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
  336. if (PrevTok->is(tok::colon))
  337. // A colon indicates this code is in a type, or a braced list
  338. // following a label in an object literal ({a: {b: 1}}). The code
  339. // below could be confused by semicolons between the individual
  340. // members in a type member list, which would normally trigger
  341. // BK_Block. In both cases, this must be parsed as an inline braced
  342. // init.
  343. Tok->BlockKind = BK_BracedInit;
  344. else if (PrevTok->is(tok::r_paren))
  345. // `) { }` can only occur in function or method declarations in JS.
  346. Tok->BlockKind = BK_Block;
  347. } else {
  348. Tok->BlockKind = BK_Unknown;
  349. }
  350. LBraceStack.push_back(Tok);
  351. break;
  352. case tok::r_brace:
  353. if (LBraceStack.empty())
  354. break;
  355. if (LBraceStack.back()->BlockKind == BK_Unknown) {
  356. bool ProbablyBracedList = false;
  357. if (Style.Language == FormatStyle::LK_Proto) {
  358. ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
  359. } else {
  360. // Using OriginalColumn to distinguish between ObjC methods and
  361. // binary operators is a bit hacky.
  362. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
  363. NextTok->OriginalColumn == 0;
  364. // If there is a comma, semicolon or right paren after the closing
  365. // brace, we assume this is a braced initializer list. Note that
  366. // regardless how we mark inner braces here, we will overwrite the
  367. // BlockKind later if we parse a braced list (where all blocks
  368. // inside are by default braced lists), or when we explicitly detect
  369. // blocks (for example while parsing lambdas).
  370. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
  371. // braced list in JS.
  372. ProbablyBracedList =
  373. (Style.Language == FormatStyle::LK_JavaScript &&
  374. NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
  375. Keywords.kw_as)) ||
  376. (Style.isCpp() && NextTok->is(tok::l_paren)) ||
  377. NextTok->isOneOf(tok::comma, tok::period, tok::colon,
  378. tok::r_paren, tok::r_square, tok::l_brace,
  379. tok::l_square, tok::ellipsis) ||
  380. (NextTok->is(tok::identifier) &&
  381. !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
  382. (NextTok->is(tok::semi) &&
  383. (!ExpectClassBody || LBraceStack.size() != 1)) ||
  384. (NextTok->isBinaryOperator() && !NextIsObjCMethod);
  385. }
  386. if (ProbablyBracedList) {
  387. Tok->BlockKind = BK_BracedInit;
  388. LBraceStack.back()->BlockKind = BK_BracedInit;
  389. } else {
  390. Tok->BlockKind = BK_Block;
  391. LBraceStack.back()->BlockKind = BK_Block;
  392. }
  393. }
  394. LBraceStack.pop_back();
  395. break;
  396. case tok::at:
  397. case tok::semi:
  398. case tok::kw_if:
  399. case tok::kw_while:
  400. case tok::kw_for:
  401. case tok::kw_switch:
  402. case tok::kw_try:
  403. case tok::kw___try:
  404. if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
  405. LBraceStack.back()->BlockKind = BK_Block;
  406. break;
  407. default:
  408. break;
  409. }
  410. PrevTok = Tok;
  411. Tok = NextTok;
  412. } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
  413. // Assume other blocks for all unclosed opening braces.
  414. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
  415. if (LBraceStack[i]->BlockKind == BK_Unknown)
  416. LBraceStack[i]->BlockKind = BK_Block;
  417. }
  418. FormatTok = Tokens->setPosition(StoredPosition);
  419. }
  420. template <class T>
  421. static inline void hash_combine(std::size_t &seed, const T &v) {
  422. std::hash<T> hasher;
  423. seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
  424. }
  425. size_t UnwrappedLineParser::computePPHash() const {
  426. size_t h = 0;
  427. for (const auto &i : PPStack) {
  428. hash_combine(h, size_t(i.Kind));
  429. hash_combine(h, i.Line);
  430. }
  431. return h;
  432. }
  433. void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
  434. bool MunchSemi) {
  435. assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
  436. "'{' or macro block token expected");
  437. const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
  438. FormatTok->BlockKind = BK_Block;
  439. size_t PPStartHash = computePPHash();
  440. unsigned InitialLevel = Line->Level;
  441. nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
  442. if (MacroBlock && FormatTok->is(tok::l_paren))
  443. parseParens();
  444. size_t NbPreprocessorDirectives =
  445. CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
  446. addUnwrappedLine();
  447. size_t OpeningLineIndex =
  448. CurrentLines->empty()
  449. ? (UnwrappedLine::kInvalidIndex)
  450. : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
  451. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  452. MustBeDeclaration);
  453. if (AddLevel)
  454. ++Line->Level;
  455. parseLevel(/*HasOpeningBrace=*/true);
  456. if (eof())
  457. return;
  458. if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
  459. : !FormatTok->is(tok::r_brace)) {
  460. Line->Level = InitialLevel;
  461. FormatTok->BlockKind = BK_Block;
  462. return;
  463. }
  464. size_t PPEndHash = computePPHash();
  465. // Munch the closing brace.
  466. nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
  467. if (MacroBlock && FormatTok->is(tok::l_paren))
  468. parseParens();
  469. if (MunchSemi && FormatTok->Tok.is(tok::semi))
  470. nextToken();
  471. Line->Level = InitialLevel;
  472. if (PPStartHash == PPEndHash) {
  473. Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
  474. if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
  475. // Update the opening line to add the forward reference as well
  476. (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
  477. CurrentLines->size() - 1;
  478. }
  479. }
  480. }
  481. static bool isGoogScope(const UnwrappedLine &Line) {
  482. // FIXME: Closure-library specific stuff should not be hard-coded but be
  483. // configurable.
  484. if (Line.Tokens.size() < 4)
  485. return false;
  486. auto I = Line.Tokens.begin();
  487. if (I->Tok->TokenText != "goog")
  488. return false;
  489. ++I;
  490. if (I->Tok->isNot(tok::period))
  491. return false;
  492. ++I;
  493. if (I->Tok->TokenText != "scope")
  494. return false;
  495. ++I;
  496. return I->Tok->is(tok::l_paren);
  497. }
  498. static bool isIIFE(const UnwrappedLine &Line,
  499. const AdditionalKeywords &Keywords) {
  500. // Look for the start of an immediately invoked anonymous function.
  501. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
  502. // This is commonly done in JavaScript to create a new, anonymous scope.
  503. // Example: (function() { ... })()
  504. if (Line.Tokens.size() < 3)
  505. return false;
  506. auto I = Line.Tokens.begin();
  507. if (I->Tok->isNot(tok::l_paren))
  508. return false;
  509. ++I;
  510. if (I->Tok->isNot(Keywords.kw_function))
  511. return false;
  512. ++I;
  513. return I->Tok->is(tok::l_paren);
  514. }
  515. static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
  516. const FormatToken &InitialToken) {
  517. if (InitialToken.is(tok::kw_namespace))
  518. return Style.BraceWrapping.AfterNamespace;
  519. if (InitialToken.is(tok::kw_class))
  520. return Style.BraceWrapping.AfterClass;
  521. if (InitialToken.is(tok::kw_union))
  522. return Style.BraceWrapping.AfterUnion;
  523. if (InitialToken.is(tok::kw_struct))
  524. return Style.BraceWrapping.AfterStruct;
  525. return false;
  526. }
  527. void UnwrappedLineParser::parseChildBlock() {
  528. FormatTok->BlockKind = BK_Block;
  529. nextToken();
  530. {
  531. bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
  532. (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
  533. ScopedLineState LineState(*this);
  534. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  535. /*MustBeDeclaration=*/false);
  536. Line->Level += SkipIndent ? 0 : 1;
  537. parseLevel(/*HasOpeningBrace=*/true);
  538. flushComments(isOnNewLine(*FormatTok));
  539. Line->Level -= SkipIndent ? 0 : 1;
  540. }
  541. nextToken();
  542. }
  543. void UnwrappedLineParser::parsePPDirective() {
  544. assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
  545. ScopedMacroState MacroState(*Line, Tokens, FormatTok);
  546. nextToken();
  547. if (!FormatTok->Tok.getIdentifierInfo()) {
  548. parsePPUnknown();
  549. return;
  550. }
  551. switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
  552. case tok::pp_define:
  553. parsePPDefine();
  554. return;
  555. case tok::pp_if:
  556. parsePPIf(/*IfDef=*/false);
  557. break;
  558. case tok::pp_ifdef:
  559. case tok::pp_ifndef:
  560. parsePPIf(/*IfDef=*/true);
  561. break;
  562. case tok::pp_else:
  563. parsePPElse();
  564. break;
  565. case tok::pp_elif:
  566. parsePPElIf();
  567. break;
  568. case tok::pp_endif:
  569. parsePPEndIf();
  570. break;
  571. default:
  572. parsePPUnknown();
  573. break;
  574. }
  575. }
  576. void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
  577. size_t Line = CurrentLines->size();
  578. if (CurrentLines == &PreprocessorDirectives)
  579. Line += Lines.size();
  580. if (Unreachable ||
  581. (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
  582. PPStack.push_back({PP_Unreachable, Line});
  583. else
  584. PPStack.push_back({PP_Conditional, Line});
  585. }
  586. void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
  587. ++PPBranchLevel;
  588. assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
  589. if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
  590. PPLevelBranchIndex.push_back(0);
  591. PPLevelBranchCount.push_back(0);
  592. }
  593. PPChainBranchIndex.push(0);
  594. bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
  595. conditionalCompilationCondition(Unreachable || Skip);
  596. }
  597. void UnwrappedLineParser::conditionalCompilationAlternative() {
  598. if (!PPStack.empty())
  599. PPStack.pop_back();
  600. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  601. if (!PPChainBranchIndex.empty())
  602. ++PPChainBranchIndex.top();
  603. conditionalCompilationCondition(
  604. PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
  605. PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
  606. }
  607. void UnwrappedLineParser::conditionalCompilationEnd() {
  608. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  609. if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
  610. if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
  611. PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
  612. }
  613. }
  614. // Guard against #endif's without #if.
  615. if (PPBranchLevel > -1)
  616. --PPBranchLevel;
  617. if (!PPChainBranchIndex.empty())
  618. PPChainBranchIndex.pop();
  619. if (!PPStack.empty())
  620. PPStack.pop_back();
  621. }
  622. void UnwrappedLineParser::parsePPIf(bool IfDef) {
  623. bool IfNDef = FormatTok->is(tok::pp_ifndef);
  624. nextToken();
  625. bool Unreachable = false;
  626. if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
  627. Unreachable = true;
  628. if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
  629. Unreachable = true;
  630. conditionalCompilationStart(Unreachable);
  631. FormatToken *IfCondition = FormatTok;
  632. // If there's a #ifndef on the first line, and the only lines before it are
  633. // comments, it could be an include guard.
  634. bool MaybeIncludeGuard = IfNDef;
  635. if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
  636. for (auto &Line : Lines) {
  637. if (!Line.Tokens.front().Tok->is(tok::comment)) {
  638. MaybeIncludeGuard = false;
  639. IncludeGuardRejected = true;
  640. break;
  641. }
  642. }
  643. }
  644. --PPBranchLevel;
  645. parsePPUnknown();
  646. ++PPBranchLevel;
  647. if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
  648. IfNdefCondition = IfCondition;
  649. }
  650. void UnwrappedLineParser::parsePPElse() {
  651. // If a potential include guard has an #else, it's not an include guard.
  652. if (FoundIncludeGuardStart && PPBranchLevel == 0)
  653. FoundIncludeGuardStart = false;
  654. conditionalCompilationAlternative();
  655. if (PPBranchLevel > -1)
  656. --PPBranchLevel;
  657. parsePPUnknown();
  658. ++PPBranchLevel;
  659. }
  660. void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
  661. void UnwrappedLineParser::parsePPEndIf() {
  662. conditionalCompilationEnd();
  663. parsePPUnknown();
  664. // If the #endif of a potential include guard is the last thing in the file,
  665. // then we count it as a real include guard and subtract one from every
  666. // preprocessor indent.
  667. unsigned TokenPosition = Tokens->getPosition();
  668. FormatToken *PeekNext = AllTokens[TokenPosition];
  669. if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
  670. Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  671. for (auto &Line : Lines)
  672. if (Line.InPPDirective && Line.Level > 0)
  673. --Line.Level;
  674. }
  675. void UnwrappedLineParser::parsePPDefine() {
  676. nextToken();
  677. if (FormatTok->Tok.getKind() != tok::identifier) {
  678. parsePPUnknown();
  679. return;
  680. }
  681. if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
  682. FoundIncludeGuardStart = true;
  683. for (auto &Line : Lines) {
  684. if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
  685. FoundIncludeGuardStart = false;
  686. break;
  687. }
  688. }
  689. }
  690. IfNdefCondition = nullptr;
  691. nextToken();
  692. if (FormatTok->Tok.getKind() == tok::l_paren &&
  693. FormatTok->WhitespaceRange.getBegin() ==
  694. FormatTok->WhitespaceRange.getEnd()) {
  695. parseParens();
  696. }
  697. if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
  698. Line->Level += PPBranchLevel + 1;
  699. addUnwrappedLine();
  700. ++Line->Level;
  701. // Errors during a preprocessor directive can only affect the layout of the
  702. // preprocessor directive, and thus we ignore them. An alternative approach
  703. // would be to use the same approach we use on the file level (no
  704. // re-indentation if there was a structural error) within the macro
  705. // definition.
  706. parseFile();
  707. }
  708. void UnwrappedLineParser::parsePPUnknown() {
  709. do {
  710. nextToken();
  711. } while (!eof());
  712. if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
  713. Line->Level += PPBranchLevel + 1;
  714. addUnwrappedLine();
  715. IfNdefCondition = nullptr;
  716. }
  717. // Here we blacklist certain tokens that are not usually the first token in an
  718. // unwrapped line. This is used in attempt to distinguish macro calls without
  719. // trailing semicolons from other constructs split to several lines.
  720. static bool tokenCanStartNewLine(const clang::Token &Tok) {
  721. // Semicolon can be a null-statement, l_square can be a start of a macro or
  722. // a C++11 attribute, but this doesn't seem to be common.
  723. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
  724. Tok.isNot(tok::l_square) &&
  725. // Tokens that can only be used as binary operators and a part of
  726. // overloaded operator names.
  727. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
  728. Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
  729. Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
  730. Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
  731. Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
  732. Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
  733. Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
  734. Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
  735. Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
  736. Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
  737. Tok.isNot(tok::lesslessequal) &&
  738. // Colon is used in labels, base class lists, initializer lists,
  739. // range-based for loops, ternary operator, but should never be the
  740. // first token in an unwrapped line.
  741. Tok.isNot(tok::colon) &&
  742. // 'noexcept' is a trailing annotation.
  743. Tok.isNot(tok::kw_noexcept);
  744. }
  745. static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
  746. const FormatToken *FormatTok) {
  747. // FIXME: This returns true for C/C++ keywords like 'struct'.
  748. return FormatTok->is(tok::identifier) &&
  749. (FormatTok->Tok.getIdentifierInfo() == nullptr ||
  750. !FormatTok->isOneOf(
  751. Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
  752. Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
  753. Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
  754. Keywords.kw_let, Keywords.kw_var, tok::kw_const,
  755. Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
  756. Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
  757. Keywords.kw_from));
  758. }
  759. static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
  760. const FormatToken *FormatTok) {
  761. return FormatTok->Tok.isLiteral() ||
  762. FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
  763. mustBeJSIdent(Keywords, FormatTok);
  764. }
  765. // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
  766. // when encountered after a value (see mustBeJSIdentOrValue).
  767. static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
  768. const FormatToken *FormatTok) {
  769. return FormatTok->isOneOf(
  770. tok::kw_return, Keywords.kw_yield,
  771. // conditionals
  772. tok::kw_if, tok::kw_else,
  773. // loops
  774. tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
  775. // switch/case
  776. tok::kw_switch, tok::kw_case,
  777. // exceptions
  778. tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
  779. // declaration
  780. tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
  781. Keywords.kw_async, Keywords.kw_function,
  782. // import/export
  783. Keywords.kw_import, tok::kw_export);
  784. }
  785. // readTokenWithJavaScriptASI reads the next token and terminates the current
  786. // line if JavaScript Automatic Semicolon Insertion must
  787. // happen between the current token and the next token.
  788. //
  789. // This method is conservative - it cannot cover all edge cases of JavaScript,
  790. // but only aims to correctly handle certain well known cases. It *must not*
  791. // return true in speculative cases.
  792. void UnwrappedLineParser::readTokenWithJavaScriptASI() {
  793. FormatToken *Previous = FormatTok;
  794. readToken();
  795. FormatToken *Next = FormatTok;
  796. bool IsOnSameLine =
  797. CommentsBeforeNextToken.empty()
  798. ? Next->NewlinesBefore == 0
  799. : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
  800. if (IsOnSameLine)
  801. return;
  802. bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
  803. bool PreviousStartsTemplateExpr =
  804. Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
  805. if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
  806. // If the token before the previous one is an '@', the previous token is an
  807. // annotation and can precede another identifier/value.
  808. const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
  809. if (PrePrevious->is(tok::at))
  810. return;
  811. }
  812. if (Next->is(tok::exclaim) && PreviousMustBeValue)
  813. return addUnwrappedLine();
  814. bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  815. bool NextEndsTemplateExpr =
  816. Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
  817. if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
  818. (PreviousMustBeValue ||
  819. Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
  820. tok::minusminus)))
  821. return addUnwrappedLine();
  822. if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
  823. isJSDeclOrStmt(Keywords, Next))
  824. return addUnwrappedLine();
  825. }
  826. void UnwrappedLineParser::parseStructuralElement() {
  827. assert(!FormatTok->is(tok::l_brace));
  828. if (Style.Language == FormatStyle::LK_TableGen &&
  829. FormatTok->is(tok::pp_include)) {
  830. nextToken();
  831. if (FormatTok->is(tok::string_literal))
  832. nextToken();
  833. addUnwrappedLine();
  834. return;
  835. }
  836. switch (FormatTok->Tok.getKind()) {
  837. case tok::at:
  838. nextToken();
  839. if (FormatTok->Tok.is(tok::l_brace)) {
  840. nextToken();
  841. parseBracedList();
  842. break;
  843. }
  844. switch (FormatTok->Tok.getObjCKeywordID()) {
  845. case tok::objc_public:
  846. case tok::objc_protected:
  847. case tok::objc_package:
  848. case tok::objc_private:
  849. return parseAccessSpecifier();
  850. case tok::objc_interface:
  851. case tok::objc_implementation:
  852. return parseObjCInterfaceOrImplementation();
  853. case tok::objc_protocol:
  854. return parseObjCProtocol();
  855. case tok::objc_end:
  856. return; // Handled by the caller.
  857. case tok::objc_optional:
  858. case tok::objc_required:
  859. nextToken();
  860. addUnwrappedLine();
  861. return;
  862. case tok::objc_autoreleasepool:
  863. nextToken();
  864. if (FormatTok->Tok.is(tok::l_brace)) {
  865. if (Style.BraceWrapping.AfterObjCDeclaration)
  866. addUnwrappedLine();
  867. parseBlock(/*MustBeDeclaration=*/false);
  868. }
  869. addUnwrappedLine();
  870. return;
  871. case tok::objc_try:
  872. // This branch isn't strictly necessary (the kw_try case below would
  873. // do this too after the tok::at is parsed above). But be explicit.
  874. parseTryCatch();
  875. return;
  876. default:
  877. break;
  878. }
  879. break;
  880. case tok::kw_asm:
  881. nextToken();
  882. if (FormatTok->is(tok::l_brace)) {
  883. FormatTok->Type = TT_InlineASMBrace;
  884. nextToken();
  885. while (FormatTok && FormatTok->isNot(tok::eof)) {
  886. if (FormatTok->is(tok::r_brace)) {
  887. FormatTok->Type = TT_InlineASMBrace;
  888. nextToken();
  889. addUnwrappedLine();
  890. break;
  891. }
  892. FormatTok->Finalized = true;
  893. nextToken();
  894. }
  895. }
  896. break;
  897. case tok::kw_namespace:
  898. parseNamespace();
  899. return;
  900. case tok::kw_inline:
  901. nextToken();
  902. if (FormatTok->Tok.is(tok::kw_namespace)) {
  903. parseNamespace();
  904. return;
  905. }
  906. break;
  907. case tok::kw_public:
  908. case tok::kw_protected:
  909. case tok::kw_private:
  910. if (Style.Language == FormatStyle::LK_Java ||
  911. Style.Language == FormatStyle::LK_JavaScript)
  912. nextToken();
  913. else
  914. parseAccessSpecifier();
  915. return;
  916. case tok::kw_if:
  917. parseIfThenElse();
  918. return;
  919. case tok::kw_for:
  920. case tok::kw_while:
  921. parseForOrWhileLoop();
  922. return;
  923. case tok::kw_do:
  924. parseDoWhile();
  925. return;
  926. case tok::kw_switch:
  927. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  928. // 'switch: string' field declaration.
  929. break;
  930. parseSwitch();
  931. return;
  932. case tok::kw_default:
  933. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  934. // 'default: string' field declaration.
  935. break;
  936. nextToken();
  937. parseLabel();
  938. return;
  939. case tok::kw_case:
  940. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  941. // 'case: string' field declaration.
  942. break;
  943. parseCaseLabel();
  944. return;
  945. case tok::kw_try:
  946. case tok::kw___try:
  947. parseTryCatch();
  948. return;
  949. case tok::kw_extern:
  950. nextToken();
  951. if (FormatTok->Tok.is(tok::string_literal)) {
  952. nextToken();
  953. if (FormatTok->Tok.is(tok::l_brace)) {
  954. if (Style.BraceWrapping.AfterExternBlock) {
  955. addUnwrappedLine();
  956. parseBlock(/*MustBeDeclaration=*/true);
  957. } else {
  958. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
  959. }
  960. addUnwrappedLine();
  961. return;
  962. }
  963. }
  964. break;
  965. case tok::kw_export:
  966. if (Style.Language == FormatStyle::LK_JavaScript) {
  967. parseJavaScriptEs6ImportExport();
  968. return;
  969. }
  970. break;
  971. case tok::identifier:
  972. if (FormatTok->is(TT_ForEachMacro)) {
  973. parseForOrWhileLoop();
  974. return;
  975. }
  976. if (FormatTok->is(TT_MacroBlockBegin)) {
  977. parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
  978. /*MunchSemi=*/false);
  979. return;
  980. }
  981. if (FormatTok->is(Keywords.kw_import)) {
  982. if (Style.Language == FormatStyle::LK_JavaScript) {
  983. parseJavaScriptEs6ImportExport();
  984. return;
  985. }
  986. if (Style.Language == FormatStyle::LK_Proto) {
  987. nextToken();
  988. if (FormatTok->is(tok::kw_public))
  989. nextToken();
  990. if (!FormatTok->is(tok::string_literal))
  991. return;
  992. nextToken();
  993. if (FormatTok->is(tok::semi))
  994. nextToken();
  995. addUnwrappedLine();
  996. return;
  997. }
  998. }
  999. if (Style.isCpp() &&
  1000. FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
  1001. Keywords.kw_slots, Keywords.kw_qslots)) {
  1002. nextToken();
  1003. if (FormatTok->is(tok::colon)) {
  1004. nextToken();
  1005. addUnwrappedLine();
  1006. return;
  1007. }
  1008. }
  1009. // In all other cases, parse the declaration.
  1010. break;
  1011. default:
  1012. break;
  1013. }
  1014. do {
  1015. const FormatToken *Previous = FormatTok->Previous;
  1016. switch (FormatTok->Tok.getKind()) {
  1017. case tok::at:
  1018. nextToken();
  1019. if (FormatTok->Tok.is(tok::l_brace)) {
  1020. nextToken();
  1021. parseBracedList();
  1022. }
  1023. break;
  1024. case tok::kw_enum:
  1025. // Ignore if this is part of "template <enum ...".
  1026. if (Previous && Previous->is(tok::less)) {
  1027. nextToken();
  1028. break;
  1029. }
  1030. // parseEnum falls through and does not yet add an unwrapped line as an
  1031. // enum definition can start a structural element.
  1032. if (!parseEnum())
  1033. break;
  1034. // This only applies for C++.
  1035. if (!Style.isCpp()) {
  1036. addUnwrappedLine();
  1037. return;
  1038. }
  1039. break;
  1040. case tok::kw_typedef:
  1041. nextToken();
  1042. if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
  1043. Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
  1044. parseEnum();
  1045. break;
  1046. case tok::kw_struct:
  1047. case tok::kw_union:
  1048. case tok::kw_class:
  1049. // parseRecord falls through and does not yet add an unwrapped line as a
  1050. // record declaration or definition can start a structural element.
  1051. parseRecord();
  1052. // This does not apply for Java and JavaScript.
  1053. if (Style.Language == FormatStyle::LK_Java ||
  1054. Style.Language == FormatStyle::LK_JavaScript) {
  1055. if (FormatTok->is(tok::semi))
  1056. nextToken();
  1057. addUnwrappedLine();
  1058. return;
  1059. }
  1060. break;
  1061. case tok::period:
  1062. nextToken();
  1063. // In Java, classes have an implicit static member "class".
  1064. if (Style.Language == FormatStyle::LK_Java && FormatTok &&
  1065. FormatTok->is(tok::kw_class))
  1066. nextToken();
  1067. if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
  1068. FormatTok->Tok.getIdentifierInfo())
  1069. // JavaScript only has pseudo keywords, all keywords are allowed to
  1070. // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
  1071. nextToken();
  1072. break;
  1073. case tok::semi:
  1074. nextToken();
  1075. addUnwrappedLine();
  1076. return;
  1077. case tok::r_brace:
  1078. addUnwrappedLine();
  1079. return;
  1080. case tok::l_paren:
  1081. parseParens();
  1082. break;
  1083. case tok::kw_operator:
  1084. nextToken();
  1085. if (FormatTok->isBinaryOperator())
  1086. nextToken();
  1087. break;
  1088. case tok::caret:
  1089. nextToken();
  1090. if (FormatTok->Tok.isAnyIdentifier() ||
  1091. FormatTok->isSimpleTypeSpecifier())
  1092. nextToken();
  1093. if (FormatTok->is(tok::l_paren))
  1094. parseParens();
  1095. if (FormatTok->is(tok::l_brace))
  1096. parseChildBlock();
  1097. break;
  1098. case tok::l_brace:
  1099. if (!tryToParseBracedList()) {
  1100. // A block outside of parentheses must be the last part of a
  1101. // structural element.
  1102. // FIXME: Figure out cases where this is not true, and add projections
  1103. // for them (the one we know is missing are lambdas).
  1104. if (Style.BraceWrapping.AfterFunction)
  1105. addUnwrappedLine();
  1106. FormatTok->Type = TT_FunctionLBrace;
  1107. parseBlock(/*MustBeDeclaration=*/false);
  1108. addUnwrappedLine();
  1109. return;
  1110. }
  1111. // Otherwise this was a braced init list, and the structural
  1112. // element continues.
  1113. break;
  1114. case tok::kw_try:
  1115. // We arrive here when parsing function-try blocks.
  1116. parseTryCatch();
  1117. return;
  1118. case tok::identifier: {
  1119. if (FormatTok->is(TT_MacroBlockEnd)) {
  1120. addUnwrappedLine();
  1121. return;
  1122. }
  1123. // Function declarations (as opposed to function expressions) are parsed
  1124. // on their own unwrapped line by continuing this loop. Function
  1125. // expressions (functions that are not on their own line) must not create
  1126. // a new unwrapped line, so they are special cased below.
  1127. size_t TokenCount = Line->Tokens.size();
  1128. if (Style.Language == FormatStyle::LK_JavaScript &&
  1129. FormatTok->is(Keywords.kw_function) &&
  1130. (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
  1131. Keywords.kw_async)))) {
  1132. tryToParseJSFunction();
  1133. break;
  1134. }
  1135. if ((Style.Language == FormatStyle::LK_JavaScript ||
  1136. Style.Language == FormatStyle::LK_Java) &&
  1137. FormatTok->is(Keywords.kw_interface)) {
  1138. if (Style.Language == FormatStyle::LK_JavaScript) {
  1139. // In JavaScript/TypeScript, "interface" can be used as a standalone
  1140. // identifier, e.g. in `var interface = 1;`. If "interface" is
  1141. // followed by another identifier, it is very like to be an actual
  1142. // interface declaration.
  1143. unsigned StoredPosition = Tokens->getPosition();
  1144. FormatToken *Next = Tokens->getNextToken();
  1145. FormatTok = Tokens->setPosition(StoredPosition);
  1146. if (Next && !mustBeJSIdent(Keywords, Next)) {
  1147. nextToken();
  1148. break;
  1149. }
  1150. }
  1151. parseRecord();
  1152. addUnwrappedLine();
  1153. return;
  1154. }
  1155. // See if the following token should start a new unwrapped line.
  1156. StringRef Text = FormatTok->TokenText;
  1157. nextToken();
  1158. if (Line->Tokens.size() == 1 &&
  1159. // JS doesn't have macros, and within classes colons indicate fields,
  1160. // not labels.
  1161. Style.Language != FormatStyle::LK_JavaScript) {
  1162. if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
  1163. Line->Tokens.begin()->Tok->MustBreakBefore = true;
  1164. parseLabel();
  1165. return;
  1166. }
  1167. // Recognize function-like macro usages without trailing semicolon as
  1168. // well as free-standing macros like Q_OBJECT.
  1169. bool FunctionLike = FormatTok->is(tok::l_paren);
  1170. if (FunctionLike)
  1171. parseParens();
  1172. bool FollowedByNewline =
  1173. CommentsBeforeNextToken.empty()
  1174. ? FormatTok->NewlinesBefore > 0
  1175. : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
  1176. if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
  1177. tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
  1178. addUnwrappedLine();
  1179. return;
  1180. }
  1181. }
  1182. break;
  1183. }
  1184. case tok::equal:
  1185. // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
  1186. // TT_JsFatArrow. The always start an expression or a child block if
  1187. // followed by a curly.
  1188. if (FormatTok->is(TT_JsFatArrow)) {
  1189. nextToken();
  1190. if (FormatTok->is(tok::l_brace))
  1191. parseChildBlock();
  1192. break;
  1193. }
  1194. nextToken();
  1195. if (FormatTok->Tok.is(tok::l_brace)) {
  1196. nextToken();
  1197. parseBracedList();
  1198. } else if (Style.Language == FormatStyle::LK_Proto &&
  1199. FormatTok->Tok.is(tok::less)) {
  1200. nextToken();
  1201. parseBracedList(/*ContinueOnSemicolons=*/false,
  1202. /*ClosingBraceKind=*/tok::greater);
  1203. }
  1204. break;
  1205. case tok::l_square:
  1206. parseSquare();
  1207. break;
  1208. case tok::kw_new:
  1209. parseNew();
  1210. break;
  1211. default:
  1212. nextToken();
  1213. break;
  1214. }
  1215. } while (!eof());
  1216. }
  1217. bool UnwrappedLineParser::tryToParseLambda() {
  1218. if (!Style.isCpp()) {
  1219. nextToken();
  1220. return false;
  1221. }
  1222. assert(FormatTok->is(tok::l_square));
  1223. FormatToken &LSquare = *FormatTok;
  1224. if (!tryToParseLambdaIntroducer())
  1225. return false;
  1226. while (FormatTok->isNot(tok::l_brace)) {
  1227. if (FormatTok->isSimpleTypeSpecifier()) {
  1228. nextToken();
  1229. continue;
  1230. }
  1231. switch (FormatTok->Tok.getKind()) {
  1232. case tok::l_brace:
  1233. break;
  1234. case tok::l_paren:
  1235. parseParens();
  1236. break;
  1237. case tok::amp:
  1238. case tok::star:
  1239. case tok::kw_const:
  1240. case tok::comma:
  1241. case tok::less:
  1242. case tok::greater:
  1243. case tok::identifier:
  1244. case tok::numeric_constant:
  1245. case tok::coloncolon:
  1246. case tok::kw_mutable:
  1247. nextToken();
  1248. break;
  1249. case tok::arrow:
  1250. FormatTok->Type = TT_LambdaArrow;
  1251. nextToken();
  1252. break;
  1253. default:
  1254. return true;
  1255. }
  1256. }
  1257. LSquare.Type = TT_LambdaLSquare;
  1258. parseChildBlock();
  1259. return true;
  1260. }
  1261. bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
  1262. const FormatToken *Previous = FormatTok->Previous;
  1263. if (Previous &&
  1264. (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
  1265. tok::kw_delete) ||
  1266. FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
  1267. Previous->isSimpleTypeSpecifier())) {
  1268. nextToken();
  1269. return false;
  1270. }
  1271. nextToken();
  1272. parseSquare(/*LambdaIntroducer=*/true);
  1273. return true;
  1274. }
  1275. void UnwrappedLineParser::tryToParseJSFunction() {
  1276. assert(FormatTok->is(Keywords.kw_function) ||
  1277. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
  1278. if (FormatTok->is(Keywords.kw_async))
  1279. nextToken();
  1280. // Consume "function".
  1281. nextToken();
  1282. // Consume * (generator function). Treat it like C++'s overloaded operators.
  1283. if (FormatTok->is(tok::star)) {
  1284. FormatTok->Type = TT_OverloadedOperator;
  1285. nextToken();
  1286. }
  1287. // Consume function name.
  1288. if (FormatTok->is(tok::identifier))
  1289. nextToken();
  1290. if (FormatTok->isNot(tok::l_paren))
  1291. return;
  1292. // Parse formal parameter list.
  1293. parseParens();
  1294. if (FormatTok->is(tok::colon)) {
  1295. // Parse a type definition.
  1296. nextToken();
  1297. // Eat the type declaration. For braced inline object types, balance braces,
  1298. // otherwise just parse until finding an l_brace for the function body.
  1299. if (FormatTok->is(tok::l_brace))
  1300. tryToParseBracedList();
  1301. else
  1302. while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
  1303. nextToken();
  1304. }
  1305. if (FormatTok->is(tok::semi))
  1306. return;
  1307. parseChildBlock();
  1308. }
  1309. bool UnwrappedLineParser::tryToParseBracedList() {
  1310. if (FormatTok->BlockKind == BK_Unknown)
  1311. calculateBraceTypes();
  1312. assert(FormatTok->BlockKind != BK_Unknown);
  1313. if (FormatTok->BlockKind == BK_Block)
  1314. return false;
  1315. nextToken();
  1316. parseBracedList();
  1317. return true;
  1318. }
  1319. bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
  1320. tok::TokenKind ClosingBraceKind) {
  1321. bool HasError = false;
  1322. // FIXME: Once we have an expression parser in the UnwrappedLineParser,
  1323. // replace this by using parseAssigmentExpression() inside.
  1324. do {
  1325. if (Style.Language == FormatStyle::LK_JavaScript) {
  1326. if (FormatTok->is(Keywords.kw_function) ||
  1327. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
  1328. tryToParseJSFunction();
  1329. continue;
  1330. }
  1331. if (FormatTok->is(TT_JsFatArrow)) {
  1332. nextToken();
  1333. // Fat arrows can be followed by simple expressions or by child blocks
  1334. // in curly braces.
  1335. if (FormatTok->is(tok::l_brace)) {
  1336. parseChildBlock();
  1337. continue;
  1338. }
  1339. }
  1340. if (FormatTok->is(tok::l_brace)) {
  1341. // Could be a method inside of a braced list `{a() { return 1; }}`.
  1342. if (tryToParseBracedList())
  1343. continue;
  1344. parseChildBlock();
  1345. }
  1346. }
  1347. if (FormatTok->Tok.getKind() == ClosingBraceKind) {
  1348. nextToken();
  1349. return !HasError;
  1350. }
  1351. switch (FormatTok->Tok.getKind()) {
  1352. case tok::caret:
  1353. nextToken();
  1354. if (FormatTok->is(tok::l_brace)) {
  1355. parseChildBlock();
  1356. }
  1357. break;
  1358. case tok::l_square:
  1359. tryToParseLambda();
  1360. break;
  1361. case tok::l_paren:
  1362. parseParens();
  1363. // JavaScript can just have free standing methods and getters/setters in
  1364. // object literals. Detect them by a "{" following ")".
  1365. if (Style.Language == FormatStyle::LK_JavaScript) {
  1366. if (FormatTok->is(tok::l_brace))
  1367. parseChildBlock();
  1368. break;
  1369. }
  1370. break;
  1371. case tok::l_brace:
  1372. // Assume there are no blocks inside a braced init list apart
  1373. // from the ones we explicitly parse out (like lambdas).
  1374. FormatTok->BlockKind = BK_BracedInit;
  1375. nextToken();
  1376. parseBracedList();
  1377. break;
  1378. case tok::less:
  1379. if (Style.Language == FormatStyle::LK_Proto) {
  1380. nextToken();
  1381. parseBracedList(/*ContinueOnSemicolons=*/false,
  1382. /*ClosingBraceKind=*/tok::greater);
  1383. } else {
  1384. nextToken();
  1385. }
  1386. break;
  1387. case tok::semi:
  1388. // JavaScript (or more precisely TypeScript) can have semicolons in braced
  1389. // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
  1390. // used for error recovery if we have otherwise determined that this is
  1391. // a braced list.
  1392. if (Style.Language == FormatStyle::LK_JavaScript) {
  1393. nextToken();
  1394. break;
  1395. }
  1396. HasError = true;
  1397. if (!ContinueOnSemicolons)
  1398. return !HasError;
  1399. nextToken();
  1400. break;
  1401. case tok::comma:
  1402. nextToken();
  1403. break;
  1404. default:
  1405. nextToken();
  1406. break;
  1407. }
  1408. } while (!eof());
  1409. return false;
  1410. }
  1411. void UnwrappedLineParser::parseParens() {
  1412. assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
  1413. nextToken();
  1414. do {
  1415. switch (FormatTok->Tok.getKind()) {
  1416. case tok::l_paren:
  1417. parseParens();
  1418. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
  1419. parseChildBlock();
  1420. break;
  1421. case tok::r_paren:
  1422. nextToken();
  1423. return;
  1424. case tok::r_brace:
  1425. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1426. return;
  1427. case tok::l_square:
  1428. tryToParseLambda();
  1429. break;
  1430. case tok::l_brace:
  1431. if (!tryToParseBracedList())
  1432. parseChildBlock();
  1433. break;
  1434. case tok::at:
  1435. nextToken();
  1436. if (FormatTok->Tok.is(tok::l_brace)) {
  1437. nextToken();
  1438. parseBracedList();
  1439. }
  1440. break;
  1441. case tok::kw_class:
  1442. if (Style.Language == FormatStyle::LK_JavaScript)
  1443. parseRecord(/*ParseAsExpr=*/true);
  1444. else
  1445. nextToken();
  1446. break;
  1447. case tok::identifier:
  1448. if (Style.Language == FormatStyle::LK_JavaScript &&
  1449. (FormatTok->is(Keywords.kw_function) ||
  1450. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
  1451. tryToParseJSFunction();
  1452. else
  1453. nextToken();
  1454. break;
  1455. default:
  1456. nextToken();
  1457. break;
  1458. }
  1459. } while (!eof());
  1460. }
  1461. void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
  1462. if (!LambdaIntroducer) {
  1463. assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
  1464. if (tryToParseLambda())
  1465. return;
  1466. }
  1467. do {
  1468. switch (FormatTok->Tok.getKind()) {
  1469. case tok::l_paren:
  1470. parseParens();
  1471. break;
  1472. case tok::r_square:
  1473. nextToken();
  1474. return;
  1475. case tok::r_brace:
  1476. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1477. return;
  1478. case tok::l_square:
  1479. parseSquare();
  1480. break;
  1481. case tok::l_brace: {
  1482. if (!tryToParseBracedList())
  1483. parseChildBlock();
  1484. break;
  1485. }
  1486. case tok::at:
  1487. nextToken();
  1488. if (FormatTok->Tok.is(tok::l_brace)) {
  1489. nextToken();
  1490. parseBracedList();
  1491. }
  1492. break;
  1493. default:
  1494. nextToken();
  1495. break;
  1496. }
  1497. } while (!eof());
  1498. }
  1499. void UnwrappedLineParser::parseIfThenElse() {
  1500. assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
  1501. nextToken();
  1502. if (FormatTok->Tok.is(tok::kw_constexpr))
  1503. nextToken();
  1504. if (FormatTok->Tok.is(tok::l_paren))
  1505. parseParens();
  1506. bool NeedsUnwrappedLine = false;
  1507. if (FormatTok->Tok.is(tok::l_brace)) {
  1508. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1509. parseBlock(/*MustBeDeclaration=*/false);
  1510. if (Style.BraceWrapping.BeforeElse)
  1511. addUnwrappedLine();
  1512. else
  1513. NeedsUnwrappedLine = true;
  1514. } else {
  1515. addUnwrappedLine();
  1516. ++Line->Level;
  1517. parseStructuralElement();
  1518. --Line->Level;
  1519. }
  1520. if (FormatTok->Tok.is(tok::kw_else)) {
  1521. nextToken();
  1522. if (FormatTok->Tok.is(tok::l_brace)) {
  1523. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1524. parseBlock(/*MustBeDeclaration=*/false);
  1525. addUnwrappedLine();
  1526. } else if (FormatTok->Tok.is(tok::kw_if)) {
  1527. parseIfThenElse();
  1528. } else {
  1529. addUnwrappedLine();
  1530. ++Line->Level;
  1531. parseStructuralElement();
  1532. if (FormatTok->is(tok::eof))
  1533. addUnwrappedLine();
  1534. --Line->Level;
  1535. }
  1536. } else if (NeedsUnwrappedLine) {
  1537. addUnwrappedLine();
  1538. }
  1539. }
  1540. void UnwrappedLineParser::parseTryCatch() {
  1541. assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
  1542. nextToken();
  1543. bool NeedsUnwrappedLine = false;
  1544. if (FormatTok->is(tok::colon)) {
  1545. // We are in a function try block, what comes is an initializer list.
  1546. nextToken();
  1547. while (FormatTok->is(tok::identifier)) {
  1548. nextToken();
  1549. if (FormatTok->is(tok::l_paren))
  1550. parseParens();
  1551. if (FormatTok->is(tok::comma))
  1552. nextToken();
  1553. }
  1554. }
  1555. // Parse try with resource.
  1556. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
  1557. parseParens();
  1558. }
  1559. if (FormatTok->is(tok::l_brace)) {
  1560. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1561. parseBlock(/*MustBeDeclaration=*/false);
  1562. if (Style.BraceWrapping.BeforeCatch) {
  1563. addUnwrappedLine();
  1564. } else {
  1565. NeedsUnwrappedLine = true;
  1566. }
  1567. } else if (!FormatTok->is(tok::kw_catch)) {
  1568. // The C++ standard requires a compound-statement after a try.
  1569. // If there's none, we try to assume there's a structuralElement
  1570. // and try to continue.
  1571. addUnwrappedLine();
  1572. ++Line->Level;
  1573. parseStructuralElement();
  1574. --Line->Level;
  1575. }
  1576. while (1) {
  1577. if (FormatTok->is(tok::at))
  1578. nextToken();
  1579. if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
  1580. tok::kw___finally) ||
  1581. ((Style.Language == FormatStyle::LK_Java ||
  1582. Style.Language == FormatStyle::LK_JavaScript) &&
  1583. FormatTok->is(Keywords.kw_finally)) ||
  1584. (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
  1585. FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
  1586. break;
  1587. nextToken();
  1588. while (FormatTok->isNot(tok::l_brace)) {
  1589. if (FormatTok->is(tok::l_paren)) {
  1590. parseParens();
  1591. continue;
  1592. }
  1593. if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
  1594. return;
  1595. nextToken();
  1596. }
  1597. NeedsUnwrappedLine = false;
  1598. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1599. parseBlock(/*MustBeDeclaration=*/false);
  1600. if (Style.BraceWrapping.BeforeCatch)
  1601. addUnwrappedLine();
  1602. else
  1603. NeedsUnwrappedLine = true;
  1604. }
  1605. if (NeedsUnwrappedLine)
  1606. addUnwrappedLine();
  1607. }
  1608. void UnwrappedLineParser::parseNamespace() {
  1609. assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
  1610. const FormatToken &InitialToken = *FormatTok;
  1611. nextToken();
  1612. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
  1613. nextToken();
  1614. if (FormatTok->Tok.is(tok::l_brace)) {
  1615. if (ShouldBreakBeforeBrace(Style, InitialToken))
  1616. addUnwrappedLine();
  1617. bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
  1618. (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
  1619. DeclarationScopeStack.size() > 1);
  1620. parseBlock(/*MustBeDeclaration=*/true, AddLevel);
  1621. // Munch the semicolon after a namespace. This is more common than one would
  1622. // think. Puttin the semicolon into its own line is very ugly.
  1623. if (FormatTok->Tok.is(tok::semi))
  1624. nextToken();
  1625. addUnwrappedLine();
  1626. }
  1627. // FIXME: Add error handling.
  1628. }
  1629. void UnwrappedLineParser::parseNew() {
  1630. assert(FormatTok->is(tok::kw_new) && "'new' expected");
  1631. nextToken();
  1632. if (Style.Language != FormatStyle::LK_Java)
  1633. return;
  1634. // In Java, we can parse everything up to the parens, which aren't optional.
  1635. do {
  1636. // There should not be a ;, { or } before the new's open paren.
  1637. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
  1638. return;
  1639. // Consume the parens.
  1640. if (FormatTok->is(tok::l_paren)) {
  1641. parseParens();
  1642. // If there is a class body of an anonymous class, consume that as child.
  1643. if (FormatTok->is(tok::l_brace))
  1644. parseChildBlock();
  1645. return;
  1646. }
  1647. nextToken();
  1648. } while (!eof());
  1649. }
  1650. void UnwrappedLineParser::parseForOrWhileLoop() {
  1651. assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
  1652. "'for', 'while' or foreach macro expected");
  1653. nextToken();
  1654. // JS' for await ( ...
  1655. if (Style.Language == FormatStyle::LK_JavaScript &&
  1656. FormatTok->is(Keywords.kw_await))
  1657. nextToken();
  1658. if (FormatTok->Tok.is(tok::l_paren))
  1659. parseParens();
  1660. if (FormatTok->Tok.is(tok::l_brace)) {
  1661. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1662. parseBlock(/*MustBeDeclaration=*/false);
  1663. addUnwrappedLine();
  1664. } else {
  1665. addUnwrappedLine();
  1666. ++Line->Level;
  1667. parseStructuralElement();
  1668. --Line->Level;
  1669. }
  1670. }
  1671. void UnwrappedLineParser::parseDoWhile() {
  1672. assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
  1673. nextToken();
  1674. if (FormatTok->Tok.is(tok::l_brace)) {
  1675. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1676. parseBlock(/*MustBeDeclaration=*/false);
  1677. if (Style.BraceWrapping.IndentBraces)
  1678. addUnwrappedLine();
  1679. } else {
  1680. addUnwrappedLine();
  1681. ++Line->Level;
  1682. parseStructuralElement();
  1683. --Line->Level;
  1684. }
  1685. // FIXME: Add error handling.
  1686. if (!FormatTok->Tok.is(tok::kw_while)) {
  1687. addUnwrappedLine();
  1688. return;
  1689. }
  1690. nextToken();
  1691. parseStructuralElement();
  1692. }
  1693. void UnwrappedLineParser::parseLabel() {
  1694. nextToken();
  1695. unsigned OldLineLevel = Line->Level;
  1696. if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
  1697. --Line->Level;
  1698. if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
  1699. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1700. parseBlock(/*MustBeDeclaration=*/false);
  1701. if (FormatTok->Tok.is(tok::kw_break)) {
  1702. if (Style.BraceWrapping.AfterControlStatement)
  1703. addUnwrappedLine();
  1704. parseStructuralElement();
  1705. }
  1706. addUnwrappedLine();
  1707. } else {
  1708. if (FormatTok->is(tok::semi))
  1709. nextToken();
  1710. addUnwrappedLine();
  1711. }
  1712. Line->Level = OldLineLevel;
  1713. if (FormatTok->isNot(tok::l_brace)) {
  1714. parseStructuralElement();
  1715. addUnwrappedLine();
  1716. }
  1717. }
  1718. void UnwrappedLineParser::parseCaseLabel() {
  1719. assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
  1720. // FIXME: fix handling of complex expressions here.
  1721. do {
  1722. nextToken();
  1723. } while (!eof() && !FormatTok->Tok.is(tok::colon));
  1724. parseLabel();
  1725. }
  1726. void UnwrappedLineParser::parseSwitch() {
  1727. assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
  1728. nextToken();
  1729. if (FormatTok->Tok.is(tok::l_paren))
  1730. parseParens();
  1731. if (FormatTok->Tok.is(tok::l_brace)) {
  1732. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1733. parseBlock(/*MustBeDeclaration=*/false);
  1734. addUnwrappedLine();
  1735. } else {
  1736. addUnwrappedLine();
  1737. ++Line->Level;
  1738. parseStructuralElement();
  1739. --Line->Level;
  1740. }
  1741. }
  1742. void UnwrappedLineParser::parseAccessSpecifier() {
  1743. nextToken();
  1744. // Understand Qt's slots.
  1745. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
  1746. nextToken();
  1747. // Otherwise, we don't know what it is, and we'd better keep the next token.
  1748. if (FormatTok->Tok.is(tok::colon))
  1749. nextToken();
  1750. addUnwrappedLine();
  1751. }
  1752. bool UnwrappedLineParser::parseEnum() {
  1753. // Won't be 'enum' for NS_ENUMs.
  1754. if (FormatTok->Tok.is(tok::kw_enum))
  1755. nextToken();
  1756. // In TypeScript, "enum" can also be used as property name, e.g. in interface
  1757. // declarations. An "enum" keyword followed by a colon would be a syntax
  1758. // error and thus assume it is just an identifier.
  1759. if (Style.Language == FormatStyle::LK_JavaScript &&
  1760. FormatTok->isOneOf(tok::colon, tok::question))
  1761. return false;
  1762. // Eat up enum class ...
  1763. if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
  1764. nextToken();
  1765. while (FormatTok->Tok.getIdentifierInfo() ||
  1766. FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
  1767. tok::greater, tok::comma, tok::question)) {
  1768. nextToken();
  1769. // We can have macros or attributes in between 'enum' and the enum name.
  1770. if (FormatTok->is(tok::l_paren))
  1771. parseParens();
  1772. if (FormatTok->is(tok::identifier)) {
  1773. nextToken();
  1774. // If there are two identifiers in a row, this is likely an elaborate
  1775. // return type. In Java, this can be "implements", etc.
  1776. if (Style.isCpp() && FormatTok->is(tok::identifier))
  1777. return false;
  1778. }
  1779. }
  1780. // Just a declaration or something is wrong.
  1781. if (FormatTok->isNot(tok::l_brace))
  1782. return true;
  1783. FormatTok->BlockKind = BK_Block;
  1784. if (Style.Language == FormatStyle::LK_Java) {
  1785. // Java enums are different.
  1786. parseJavaEnumBody();
  1787. return true;
  1788. }
  1789. if (Style.Language == FormatStyle::LK_Proto) {
  1790. parseBlock(/*MustBeDeclaration=*/true);
  1791. return true;
  1792. }
  1793. // Parse enum body.
  1794. nextToken();
  1795. bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
  1796. if (HasError) {
  1797. if (FormatTok->is(tok::semi))
  1798. nextToken();
  1799. addUnwrappedLine();
  1800. }
  1801. return true;
  1802. // There is no addUnwrappedLine() here so that we fall through to parsing a
  1803. // structural element afterwards. Thus, in "enum A {} n, m;",
  1804. // "} n, m;" will end up in one unwrapped line.
  1805. }
  1806. void UnwrappedLineParser::parseJavaEnumBody() {
  1807. // Determine whether the enum is simple, i.e. does not have a semicolon or
  1808. // constants with class bodies. Simple enums can be formatted like braced
  1809. // lists, contracted to a single line, etc.
  1810. unsigned StoredPosition = Tokens->getPosition();
  1811. bool IsSimple = true;
  1812. FormatToken *Tok = Tokens->getNextToken();
  1813. while (Tok) {
  1814. if (Tok->is(tok::r_brace))
  1815. break;
  1816. if (Tok->isOneOf(tok::l_brace, tok::semi)) {
  1817. IsSimple = false;
  1818. break;
  1819. }
  1820. // FIXME: This will also mark enums with braces in the arguments to enum
  1821. // constants as "not simple". This is probably fine in practice, though.
  1822. Tok = Tokens->getNextToken();
  1823. }
  1824. FormatTok = Tokens->setPosition(StoredPosition);
  1825. if (IsSimple) {
  1826. nextToken();
  1827. parseBracedList();
  1828. addUnwrappedLine();
  1829. return;
  1830. }
  1831. // Parse the body of a more complex enum.
  1832. // First add a line for everything up to the "{".
  1833. nextToken();
  1834. addUnwrappedLine();
  1835. ++Line->Level;
  1836. // Parse the enum constants.
  1837. while (FormatTok) {
  1838. if (FormatTok->is(tok::l_brace)) {
  1839. // Parse the constant's class body.
  1840. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  1841. /*MunchSemi=*/false);
  1842. } else if (FormatTok->is(tok::l_paren)) {
  1843. parseParens();
  1844. } else if (FormatTok->is(tok::comma)) {
  1845. nextToken();
  1846. addUnwrappedLine();
  1847. } else if (FormatTok->is(tok::semi)) {
  1848. nextToken();
  1849. addUnwrappedLine();
  1850. break;
  1851. } else if (FormatTok->is(tok::r_brace)) {
  1852. addUnwrappedLine();
  1853. break;
  1854. } else {
  1855. nextToken();
  1856. }
  1857. }
  1858. // Parse the class body after the enum's ";" if any.
  1859. parseLevel(/*HasOpeningBrace=*/true);
  1860. nextToken();
  1861. --Line->Level;
  1862. addUnwrappedLine();
  1863. }
  1864. void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
  1865. const FormatToken &InitialToken = *FormatTok;
  1866. nextToken();
  1867. // The actual identifier can be a nested name specifier, and in macros
  1868. // it is often token-pasted.
  1869. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
  1870. tok::kw___attribute, tok::kw___declspec,
  1871. tok::kw_alignas) ||
  1872. ((Style.Language == FormatStyle::LK_Java ||
  1873. Style.Language == FormatStyle::LK_JavaScript) &&
  1874. FormatTok->isOneOf(tok::period, tok::comma))) {
  1875. if (Style.Language == FormatStyle::LK_JavaScript &&
  1876. FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
  1877. // JavaScript/TypeScript supports inline object types in
  1878. // extends/implements positions:
  1879. // class Foo implements {bar: number} { }
  1880. nextToken();
  1881. if (FormatTok->is(tok::l_brace)) {
  1882. tryToParseBracedList();
  1883. continue;
  1884. }
  1885. }
  1886. bool IsNonMacroIdentifier =
  1887. FormatTok->is(tok::identifier) &&
  1888. FormatTok->TokenText != FormatTok->TokenText.upper();
  1889. nextToken();
  1890. // We can have macros or attributes in between 'class' and the class name.
  1891. if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
  1892. parseParens();
  1893. }
  1894. // Note that parsing away template declarations here leads to incorrectly
  1895. // accepting function declarations as record declarations.
  1896. // In general, we cannot solve this problem. Consider:
  1897. // class A<int> B() {}
  1898. // which can be a function definition or a class definition when B() is a
  1899. // macro. If we find enough real-world cases where this is a problem, we
  1900. // can parse for the 'template' keyword in the beginning of the statement,
  1901. // and thus rule out the record production in case there is no template
  1902. // (this would still leave us with an ambiguity between template function
  1903. // and class declarations).
  1904. if (FormatTok->isOneOf(tok::colon, tok::less)) {
  1905. while (!eof()) {
  1906. if (FormatTok->is(tok::l_brace)) {
  1907. calculateBraceTypes(/*ExpectClassBody=*/true);
  1908. if (!tryToParseBracedList())
  1909. break;
  1910. }
  1911. if (FormatTok->Tok.is(tok::semi))
  1912. return;
  1913. nextToken();
  1914. }
  1915. }
  1916. if (FormatTok->Tok.is(tok::l_brace)) {
  1917. if (ParseAsExpr) {
  1918. parseChildBlock();
  1919. } else {
  1920. if (ShouldBreakBeforeBrace(Style, InitialToken))
  1921. addUnwrappedLine();
  1922. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  1923. /*MunchSemi=*/false);
  1924. }
  1925. }
  1926. // There is no addUnwrappedLine() here so that we fall through to parsing a
  1927. // structural element afterwards. Thus, in "class A {} n, m;",
  1928. // "} n, m;" will end up in one unwrapped line.
  1929. }
  1930. void UnwrappedLineParser::parseObjCProtocolList() {
  1931. assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
  1932. do
  1933. nextToken();
  1934. while (!eof() && FormatTok->Tok.isNot(tok::greater));
  1935. nextToken(); // Skip '>'.
  1936. }
  1937. void UnwrappedLineParser::parseObjCUntilAtEnd() {
  1938. do {
  1939. if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
  1940. nextToken();
  1941. addUnwrappedLine();
  1942. break;
  1943. }
  1944. if (FormatTok->is(tok::l_brace)) {
  1945. parseBlock(/*MustBeDeclaration=*/false);
  1946. // In ObjC interfaces, nothing should be following the "}".
  1947. addUnwrappedLine();
  1948. } else if (FormatTok->is(tok::r_brace)) {
  1949. // Ignore stray "}". parseStructuralElement doesn't consume them.
  1950. nextToken();
  1951. addUnwrappedLine();
  1952. } else {
  1953. parseStructuralElement();
  1954. }
  1955. } while (!eof());
  1956. }
  1957. void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
  1958. nextToken();
  1959. nextToken(); // interface name
  1960. // @interface can be followed by either a base class, or a category.
  1961. if (FormatTok->Tok.is(tok::colon)) {
  1962. nextToken();
  1963. nextToken(); // base class name
  1964. } else if (FormatTok->Tok.is(tok::l_paren))
  1965. // Skip category, if present.
  1966. parseParens();
  1967. if (FormatTok->Tok.is(tok::less))
  1968. parseObjCProtocolList();
  1969. if (FormatTok->Tok.is(tok::l_brace)) {
  1970. if (Style.BraceWrapping.AfterObjCDeclaration)
  1971. addUnwrappedLine();
  1972. parseBlock(/*MustBeDeclaration=*/true);
  1973. }
  1974. // With instance variables, this puts '}' on its own line. Without instance
  1975. // variables, this ends the @interface line.
  1976. addUnwrappedLine();
  1977. parseObjCUntilAtEnd();
  1978. }
  1979. void UnwrappedLineParser::parseObjCProtocol() {
  1980. nextToken();
  1981. nextToken(); // protocol name
  1982. if (FormatTok->Tok.is(tok::less))
  1983. parseObjCProtocolList();
  1984. // Check for protocol declaration.
  1985. if (FormatTok->Tok.is(tok::semi)) {
  1986. nextToken();
  1987. return addUnwrappedLine();
  1988. }
  1989. addUnwrappedLine();
  1990. parseObjCUntilAtEnd();
  1991. }
  1992. void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
  1993. bool IsImport = FormatTok->is(Keywords.kw_import);
  1994. assert(IsImport || FormatTok->is(tok::kw_export));
  1995. nextToken();
  1996. // Consume the "default" in "export default class/function".
  1997. if (FormatTok->is(tok::kw_default))
  1998. nextToken();
  1999. // Consume "async function", "function" and "default function", so that these
  2000. // get parsed as free-standing JS functions, i.e. do not require a trailing
  2001. // semicolon.
  2002. if (FormatTok->is(Keywords.kw_async))
  2003. nextToken();
  2004. if (FormatTok->is(Keywords.kw_function)) {
  2005. nextToken();
  2006. return;
  2007. }
  2008. // For imports, `export *`, `export {...}`, consume the rest of the line up
  2009. // to the terminating `;`. For everything else, just return and continue
  2010. // parsing the structural element, i.e. the declaration or expression for
  2011. // `export default`.
  2012. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
  2013. !FormatTok->isStringLiteral())
  2014. return;
  2015. while (!eof()) {
  2016. if (FormatTok->is(tok::semi))
  2017. return;
  2018. if (Line->Tokens.size() == 0) {
  2019. // Common issue: Automatic Semicolon Insertion wrapped the line, so the
  2020. // import statement should terminate.
  2021. return;
  2022. }
  2023. if (FormatTok->is(tok::l_brace)) {
  2024. FormatTok->BlockKind = BK_Block;
  2025. nextToken();
  2026. parseBracedList();
  2027. } else {
  2028. nextToken();
  2029. }
  2030. }
  2031. }
  2032. LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
  2033. StringRef Prefix = "") {
  2034. llvm::dbgs() << Prefix << "Line(" << Line.Level
  2035. << ", FSC=" << Line.FirstStartColumn << ")"
  2036. << (Line.InPPDirective ? " MACRO" : "") << ": ";
  2037. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2038. E = Line.Tokens.end();
  2039. I != E; ++I) {
  2040. llvm::dbgs() << I->Tok->Tok.getName() << "["
  2041. << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
  2042. << "] ";
  2043. }
  2044. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2045. E = Line.Tokens.end();
  2046. I != E; ++I) {
  2047. const UnwrappedLineNode &Node = *I;
  2048. for (SmallVectorImpl<UnwrappedLine>::const_iterator
  2049. I = Node.Children.begin(),
  2050. E = Node.Children.end();
  2051. I != E; ++I) {
  2052. printDebugInfo(*I, "\nChild: ");
  2053. }
  2054. }
  2055. llvm::dbgs() << "\n";
  2056. }
  2057. void UnwrappedLineParser::addUnwrappedLine() {
  2058. if (Line->Tokens.empty())
  2059. return;
  2060. DEBUG({
  2061. if (CurrentLines == &Lines)
  2062. printDebugInfo(*Line);
  2063. });
  2064. CurrentLines->push_back(std::move(*Line));
  2065. Line->Tokens.clear();
  2066. Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
  2067. Line->FirstStartColumn = 0;
  2068. if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
  2069. CurrentLines->append(
  2070. std::make_move_iterator(PreprocessorDirectives.begin()),
  2071. std::make_move_iterator(PreprocessorDirectives.end()));
  2072. PreprocessorDirectives.clear();
  2073. }
  2074. // Disconnect the current token from the last token on the previous line.
  2075. FormatTok->Previous = nullptr;
  2076. }
  2077. bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
  2078. bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
  2079. return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
  2080. FormatTok.NewlinesBefore > 0;
  2081. }
  2082. // Checks if \p FormatTok is a line comment that continues the line comment
  2083. // section on \p Line.
  2084. static bool continuesLineCommentSection(const FormatToken &FormatTok,
  2085. const UnwrappedLine &Line,
  2086. llvm::Regex &CommentPragmasRegex) {
  2087. if (Line.Tokens.empty())
  2088. return false;
  2089. StringRef IndentContent = FormatTok.TokenText;
  2090. if (FormatTok.TokenText.startswith("//") ||
  2091. FormatTok.TokenText.startswith("/*"))
  2092. IndentContent = FormatTok.TokenText.substr(2);
  2093. if (CommentPragmasRegex.match(IndentContent))
  2094. return false;
  2095. // If Line starts with a line comment, then FormatTok continues the comment
  2096. // section if its original column is greater or equal to the original start
  2097. // column of the line.
  2098. //
  2099. // Define the min column token of a line as follows: if a line ends in '{' or
  2100. // contains a '{' followed by a line comment, then the min column token is
  2101. // that '{'. Otherwise, the min column token of the line is the first token of
  2102. // the line.
  2103. //
  2104. // If Line starts with a token other than a line comment, then FormatTok
  2105. // continues the comment section if its original column is greater than the
  2106. // original start column of the min column token of the line.
  2107. //
  2108. // For example, the second line comment continues the first in these cases:
  2109. //
  2110. // // first line
  2111. // // second line
  2112. //
  2113. // and:
  2114. //
  2115. // // first line
  2116. // // second line
  2117. //
  2118. // and:
  2119. //
  2120. // int i; // first line
  2121. // // second line
  2122. //
  2123. // and:
  2124. //
  2125. // do { // first line
  2126. // // second line
  2127. // int i;
  2128. // } while (true);
  2129. //
  2130. // and:
  2131. //
  2132. // enum {
  2133. // a, // first line
  2134. // // second line
  2135. // b
  2136. // };
  2137. //
  2138. // The second line comment doesn't continue the first in these cases:
  2139. //
  2140. // // first line
  2141. // // second line
  2142. //
  2143. // and:
  2144. //
  2145. // int i; // first line
  2146. // // second line
  2147. //
  2148. // and:
  2149. //
  2150. // do { // first line
  2151. // // second line
  2152. // int i;
  2153. // } while (true);
  2154. //
  2155. // and:
  2156. //
  2157. // enum {
  2158. // a, // first line
  2159. // // second line
  2160. // };
  2161. const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
  2162. // Scan for '{//'. If found, use the column of '{' as a min column for line
  2163. // comment section continuation.
  2164. const FormatToken *PreviousToken = nullptr;
  2165. for (const UnwrappedLineNode &Node : Line.Tokens) {
  2166. if (PreviousToken && PreviousToken->is(tok::l_brace) &&
  2167. isLineComment(*Node.Tok)) {
  2168. MinColumnToken = PreviousToken;
  2169. break;
  2170. }
  2171. PreviousToken = Node.Tok;
  2172. // Grab the last newline preceding a token in this unwrapped line.
  2173. if (Node.Tok->NewlinesBefore > 0) {
  2174. MinColumnToken = Node.Tok;
  2175. }
  2176. }
  2177. if (PreviousToken && PreviousToken->is(tok::l_brace)) {
  2178. MinColumnToken = PreviousToken;
  2179. }
  2180. return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
  2181. MinColumnToken);
  2182. }
  2183. void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
  2184. bool JustComments = Line->Tokens.empty();
  2185. for (SmallVectorImpl<FormatToken *>::const_iterator
  2186. I = CommentsBeforeNextToken.begin(),
  2187. E = CommentsBeforeNextToken.end();
  2188. I != E; ++I) {
  2189. // Line comments that belong to the same line comment section are put on the
  2190. // same line since later we might want to reflow content between them.
  2191. // Additional fine-grained breaking of line comment sections is controlled
  2192. // by the class BreakableLineCommentSection in case it is desirable to keep
  2193. // several line comment sections in the same unwrapped line.
  2194. //
  2195. // FIXME: Consider putting separate line comment sections as children to the
  2196. // unwrapped line instead.
  2197. (*I)->ContinuesLineCommentSection =
  2198. continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
  2199. if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
  2200. addUnwrappedLine();
  2201. pushToken(*I);
  2202. }
  2203. if (NewlineBeforeNext && JustComments)
  2204. addUnwrappedLine();
  2205. CommentsBeforeNextToken.clear();
  2206. }
  2207. void UnwrappedLineParser::nextToken(int LevelDifference) {
  2208. if (eof())
  2209. return;
  2210. flushComments(isOnNewLine(*FormatTok));
  2211. pushToken(FormatTok);
  2212. FormatToken *Previous = FormatTok;
  2213. if (Style.Language != FormatStyle::LK_JavaScript)
  2214. readToken(LevelDifference);
  2215. else
  2216. readTokenWithJavaScriptASI();
  2217. FormatTok->Previous = Previous;
  2218. }
  2219. void UnwrappedLineParser::distributeComments(
  2220. const SmallVectorImpl<FormatToken *> &Comments,
  2221. const FormatToken *NextTok) {
  2222. // Whether or not a line comment token continues a line is controlled by
  2223. // the method continuesLineCommentSection, with the following caveat:
  2224. //
  2225. // Define a trail of Comments to be a nonempty proper postfix of Comments such
  2226. // that each comment line from the trail is aligned with the next token, if
  2227. // the next token exists. If a trail exists, the beginning of the maximal
  2228. // trail is marked as a start of a new comment section.
  2229. //
  2230. // For example in this code:
  2231. //
  2232. // int a; // line about a
  2233. // // line 1 about b
  2234. // // line 2 about b
  2235. // int b;
  2236. //
  2237. // the two lines about b form a maximal trail, so there are two sections, the
  2238. // first one consisting of the single comment "// line about a" and the
  2239. // second one consisting of the next two comments.
  2240. if (Comments.empty())
  2241. return;
  2242. bool ShouldPushCommentsInCurrentLine = true;
  2243. bool HasTrailAlignedWithNextToken = false;
  2244. unsigned StartOfTrailAlignedWithNextToken = 0;
  2245. if (NextTok) {
  2246. // We are skipping the first element intentionally.
  2247. for (unsigned i = Comments.size() - 1; i > 0; --i) {
  2248. if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
  2249. HasTrailAlignedWithNextToken = true;
  2250. StartOfTrailAlignedWithNextToken = i;
  2251. }
  2252. }
  2253. }
  2254. for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
  2255. FormatToken *FormatTok = Comments[i];
  2256. if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
  2257. FormatTok->ContinuesLineCommentSection = false;
  2258. } else {
  2259. FormatTok->ContinuesLineCommentSection =
  2260. continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
  2261. }
  2262. if (!FormatTok->ContinuesLineCommentSection &&
  2263. (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
  2264. ShouldPushCommentsInCurrentLine = false;
  2265. }
  2266. if (ShouldPushCommentsInCurrentLine) {
  2267. pushToken(FormatTok);
  2268. } else {
  2269. CommentsBeforeNextToken.push_back(FormatTok);
  2270. }
  2271. }
  2272. }
  2273. void UnwrappedLineParser::readToken(int LevelDifference) {
  2274. SmallVector<FormatToken *, 1> Comments;
  2275. do {
  2276. FormatTok = Tokens->getNextToken();
  2277. assert(FormatTok);
  2278. while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
  2279. (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
  2280. distributeComments(Comments, FormatTok);
  2281. Comments.clear();
  2282. // If there is an unfinished unwrapped line, we flush the preprocessor
  2283. // directives only after that unwrapped line was finished later.
  2284. bool SwitchToPreprocessorLines = !Line->Tokens.empty();
  2285. ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
  2286. assert((LevelDifference >= 0 ||
  2287. static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
  2288. "LevelDifference makes Line->Level negative");
  2289. Line->Level += LevelDifference;
  2290. // Comments stored before the preprocessor directive need to be output
  2291. // before the preprocessor directive, at the same level as the
  2292. // preprocessor directive, as we consider them to apply to the directive.
  2293. flushComments(isOnNewLine(*FormatTok));
  2294. parsePPDirective();
  2295. }
  2296. while (FormatTok->Type == TT_ConflictStart ||
  2297. FormatTok->Type == TT_ConflictEnd ||
  2298. FormatTok->Type == TT_ConflictAlternative) {
  2299. if (FormatTok->Type == TT_ConflictStart) {
  2300. conditionalCompilationStart(/*Unreachable=*/false);
  2301. } else if (FormatTok->Type == TT_ConflictAlternative) {
  2302. conditionalCompilationAlternative();
  2303. } else if (FormatTok->Type == TT_ConflictEnd) {
  2304. conditionalCompilationEnd();
  2305. }
  2306. FormatTok = Tokens->getNextToken();
  2307. FormatTok->MustBreakBefore = true;
  2308. }
  2309. if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
  2310. !Line->InPPDirective) {
  2311. continue;
  2312. }
  2313. if (!FormatTok->Tok.is(tok::comment)) {
  2314. distributeComments(Comments, FormatTok);
  2315. Comments.clear();
  2316. return;
  2317. }
  2318. Comments.push_back(FormatTok);
  2319. } while (!eof());
  2320. distributeComments(Comments, nullptr);
  2321. Comments.clear();
  2322. }
  2323. void UnwrappedLineParser::pushToken(FormatToken *Tok) {
  2324. Line->Tokens.push_back(UnwrappedLineNode(Tok));
  2325. if (MustBreakBeforeNextToken) {
  2326. Line->Tokens.back().Tok->MustBreakBefore = true;
  2327. MustBreakBeforeNextToken = false;
  2328. }
  2329. }
  2330. } // end namespace format
  2331. } // end namespace clang