UnwrappedLineParser.cpp 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755
  1. //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the implementation of the UnwrappedLineParser,
  11. /// which turns a stream of tokens into UnwrappedLines.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "UnwrappedLineParser.h"
  15. #include "llvm/ADT/STLExtras.h"
  16. #include "llvm/Support/Debug.h"
  17. #include "llvm/Support/raw_ostream.h"
  18. #include <algorithm>
  19. #define DEBUG_TYPE "format-parser"
  20. namespace clang {
  21. namespace format {
  22. class FormatTokenSource {
  23. public:
  24. virtual ~FormatTokenSource() {}
  25. virtual FormatToken *getNextToken() = 0;
  26. virtual unsigned getPosition() = 0;
  27. virtual FormatToken *setPosition(unsigned Position) = 0;
  28. };
  29. namespace {
  30. class ScopedDeclarationState {
  31. public:
  32. ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  33. bool MustBeDeclaration)
  34. : Line(Line), Stack(Stack) {
  35. Line.MustBeDeclaration = MustBeDeclaration;
  36. Stack.push_back(MustBeDeclaration);
  37. }
  38. ~ScopedDeclarationState() {
  39. Stack.pop_back();
  40. if (!Stack.empty())
  41. Line.MustBeDeclaration = Stack.back();
  42. else
  43. Line.MustBeDeclaration = true;
  44. }
  45. private:
  46. UnwrappedLine &Line;
  47. std::vector<bool> &Stack;
  48. };
  49. static bool isLineComment(const FormatToken &FormatTok) {
  50. return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  51. }
  52. // Checks if \p FormatTok is a line comment that continues the line comment
  53. // \p Previous. The original column of \p MinColumnToken is used to determine
  54. // whether \p FormatTok is indented enough to the right to continue \p Previous.
  55. static bool continuesLineComment(const FormatToken &FormatTok,
  56. const FormatToken *Previous,
  57. const FormatToken *MinColumnToken) {
  58. if (!Previous || !MinColumnToken)
  59. return false;
  60. unsigned MinContinueColumn =
  61. MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  62. return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  63. isLineComment(*Previous) &&
  64. FormatTok.OriginalColumn >= MinContinueColumn;
  65. }
  66. class ScopedMacroState : public FormatTokenSource {
  67. public:
  68. ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  69. FormatToken *&ResetToken)
  70. : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  71. PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  72. Token(nullptr), PreviousToken(nullptr) {
  73. FakeEOF.Tok.startToken();
  74. FakeEOF.Tok.setKind(tok::eof);
  75. TokenSource = this;
  76. Line.Level = 0;
  77. Line.InPPDirective = true;
  78. }
  79. ~ScopedMacroState() override {
  80. TokenSource = PreviousTokenSource;
  81. ResetToken = Token;
  82. Line.InPPDirective = false;
  83. Line.Level = PreviousLineLevel;
  84. }
  85. FormatToken *getNextToken() override {
  86. // The \c UnwrappedLineParser guards against this by never calling
  87. // \c getNextToken() after it has encountered the first eof token.
  88. assert(!eof());
  89. PreviousToken = Token;
  90. Token = PreviousTokenSource->getNextToken();
  91. if (eof())
  92. return &FakeEOF;
  93. return Token;
  94. }
  95. unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  96. FormatToken *setPosition(unsigned Position) override {
  97. PreviousToken = nullptr;
  98. Token = PreviousTokenSource->setPosition(Position);
  99. return Token;
  100. }
  101. private:
  102. bool eof() {
  103. return Token && Token->HasUnescapedNewline &&
  104. !continuesLineComment(*Token, PreviousToken,
  105. /*MinColumnToken=*/PreviousToken);
  106. }
  107. FormatToken FakeEOF;
  108. UnwrappedLine &Line;
  109. FormatTokenSource *&TokenSource;
  110. FormatToken *&ResetToken;
  111. unsigned PreviousLineLevel;
  112. FormatTokenSource *PreviousTokenSource;
  113. FormatToken *Token;
  114. FormatToken *PreviousToken;
  115. };
  116. } // end anonymous namespace
  117. class ScopedLineState {
  118. public:
  119. ScopedLineState(UnwrappedLineParser &Parser,
  120. bool SwitchToPreprocessorLines = false)
  121. : Parser(Parser), OriginalLines(Parser.CurrentLines) {
  122. if (SwitchToPreprocessorLines)
  123. Parser.CurrentLines = &Parser.PreprocessorDirectives;
  124. else if (!Parser.Line->Tokens.empty())
  125. Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
  126. PreBlockLine = std::move(Parser.Line);
  127. Parser.Line = llvm::make_unique<UnwrappedLine>();
  128. Parser.Line->Level = PreBlockLine->Level;
  129. Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
  130. }
  131. ~ScopedLineState() {
  132. if (!Parser.Line->Tokens.empty()) {
  133. Parser.addUnwrappedLine();
  134. }
  135. assert(Parser.Line->Tokens.empty());
  136. Parser.Line = std::move(PreBlockLine);
  137. if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
  138. Parser.MustBreakBeforeNextToken = true;
  139. Parser.CurrentLines = OriginalLines;
  140. }
  141. private:
  142. UnwrappedLineParser &Parser;
  143. std::unique_ptr<UnwrappedLine> PreBlockLine;
  144. SmallVectorImpl<UnwrappedLine> *OriginalLines;
  145. };
  146. class CompoundStatementIndenter {
  147. public:
  148. CompoundStatementIndenter(UnwrappedLineParser *Parser,
  149. const FormatStyle &Style, unsigned &LineLevel)
  150. : CompoundStatementIndenter(Parser, LineLevel,
  151. Style.BraceWrapping.AfterControlStatement,
  152. Style.BraceWrapping.IndentBraces) {}
  153. CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
  154. bool WrapBrace, bool IndentBrace)
  155. : LineLevel(LineLevel), OldLineLevel(LineLevel) {
  156. if (WrapBrace)
  157. Parser->addUnwrappedLine();
  158. if (IndentBrace)
  159. ++LineLevel;
  160. }
  161. ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
  162. private:
  163. unsigned &LineLevel;
  164. unsigned OldLineLevel;
  165. };
  166. namespace {
  167. class IndexedTokenSource : public FormatTokenSource {
  168. public:
  169. IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
  170. : Tokens(Tokens), Position(-1) {}
  171. FormatToken *getNextToken() override {
  172. ++Position;
  173. return Tokens[Position];
  174. }
  175. unsigned getPosition() override {
  176. assert(Position >= 0);
  177. return Position;
  178. }
  179. FormatToken *setPosition(unsigned P) override {
  180. Position = P;
  181. return Tokens[Position];
  182. }
  183. void reset() { Position = -1; }
  184. private:
  185. ArrayRef<FormatToken *> Tokens;
  186. int Position;
  187. };
  188. } // end anonymous namespace
  189. UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
  190. const AdditionalKeywords &Keywords,
  191. unsigned FirstStartColumn,
  192. ArrayRef<FormatToken *> Tokens,
  193. UnwrappedLineConsumer &Callback)
  194. : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
  195. CurrentLines(&Lines), Style(Style), Keywords(Keywords),
  196. CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
  197. Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
  198. IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
  199. ? IG_Rejected
  200. : IG_Inited),
  201. IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
  202. void UnwrappedLineParser::reset() {
  203. PPBranchLevel = -1;
  204. IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
  205. ? IG_Rejected
  206. : IG_Inited;
  207. IncludeGuardToken = nullptr;
  208. Line.reset(new UnwrappedLine);
  209. CommentsBeforeNextToken.clear();
  210. FormatTok = nullptr;
  211. MustBreakBeforeNextToken = false;
  212. PreprocessorDirectives.clear();
  213. CurrentLines = &Lines;
  214. DeclarationScopeStack.clear();
  215. PPStack.clear();
  216. Line->FirstStartColumn = FirstStartColumn;
  217. }
  218. void UnwrappedLineParser::parse() {
  219. IndexedTokenSource TokenSource(AllTokens);
  220. Line->FirstStartColumn = FirstStartColumn;
  221. do {
  222. LLVM_DEBUG(llvm::dbgs() << "----\n");
  223. reset();
  224. Tokens = &TokenSource;
  225. TokenSource.reset();
  226. readToken();
  227. parseFile();
  228. // If we found an include guard then all preprocessor directives (other than
  229. // the guard) are over-indented by one.
  230. if (IncludeGuard == IG_Found)
  231. for (auto &Line : Lines)
  232. if (Line.InPPDirective && Line.Level > 0)
  233. --Line.Level;
  234. // Create line with eof token.
  235. pushToken(FormatTok);
  236. addUnwrappedLine();
  237. for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
  238. E = Lines.end();
  239. I != E; ++I) {
  240. Callback.consumeUnwrappedLine(*I);
  241. }
  242. Callback.finishRun();
  243. Lines.clear();
  244. while (!PPLevelBranchIndex.empty() &&
  245. PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
  246. PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
  247. PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
  248. }
  249. if (!PPLevelBranchIndex.empty()) {
  250. ++PPLevelBranchIndex.back();
  251. assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
  252. assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
  253. }
  254. } while (!PPLevelBranchIndex.empty());
  255. }
  256. void UnwrappedLineParser::parseFile() {
  257. // The top-level context in a file always has declarations, except for pre-
  258. // processor directives and JavaScript files.
  259. bool MustBeDeclaration =
  260. !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
  261. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  262. MustBeDeclaration);
  263. if (Style.Language == FormatStyle::LK_TextProto)
  264. parseBracedList();
  265. else
  266. parseLevel(/*HasOpeningBrace=*/false);
  267. // Make sure to format the remaining tokens.
  268. //
  269. // LK_TextProto is special since its top-level is parsed as the body of a
  270. // braced list, which does not necessarily have natural line separators such
  271. // as a semicolon. Comments after the last entry that have been determined to
  272. // not belong to that line, as in:
  273. // key: value
  274. // // endfile comment
  275. // do not have a chance to be put on a line of their own until this point.
  276. // Here we add this newline before end-of-file comments.
  277. if (Style.Language == FormatStyle::LK_TextProto &&
  278. !CommentsBeforeNextToken.empty())
  279. addUnwrappedLine();
  280. flushComments(true);
  281. addUnwrappedLine();
  282. }
  283. void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
  284. bool SwitchLabelEncountered = false;
  285. do {
  286. tok::TokenKind kind = FormatTok->Tok.getKind();
  287. if (FormatTok->Type == TT_MacroBlockBegin) {
  288. kind = tok::l_brace;
  289. } else if (FormatTok->Type == TT_MacroBlockEnd) {
  290. kind = tok::r_brace;
  291. }
  292. switch (kind) {
  293. case tok::comment:
  294. nextToken();
  295. addUnwrappedLine();
  296. break;
  297. case tok::l_brace:
  298. // FIXME: Add parameter whether this can happen - if this happens, we must
  299. // be in a non-declaration context.
  300. if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
  301. continue;
  302. parseBlock(/*MustBeDeclaration=*/false);
  303. addUnwrappedLine();
  304. break;
  305. case tok::r_brace:
  306. if (HasOpeningBrace)
  307. return;
  308. nextToken();
  309. addUnwrappedLine();
  310. break;
  311. case tok::kw_default: {
  312. unsigned StoredPosition = Tokens->getPosition();
  313. FormatToken *Next;
  314. do {
  315. Next = Tokens->getNextToken();
  316. } while (Next && Next->is(tok::comment));
  317. FormatTok = Tokens->setPosition(StoredPosition);
  318. if (Next && Next->isNot(tok::colon)) {
  319. // default not followed by ':' is not a case label; treat it like
  320. // an identifier.
  321. parseStructuralElement();
  322. break;
  323. }
  324. // Else, if it is 'default:', fall through to the case handling.
  325. LLVM_FALLTHROUGH;
  326. }
  327. case tok::kw_case:
  328. if (Style.Language == FormatStyle::LK_JavaScript &&
  329. Line->MustBeDeclaration) {
  330. // A 'case: string' style field declaration.
  331. parseStructuralElement();
  332. break;
  333. }
  334. if (!SwitchLabelEncountered &&
  335. (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
  336. ++Line->Level;
  337. SwitchLabelEncountered = true;
  338. parseStructuralElement();
  339. break;
  340. default:
  341. parseStructuralElement();
  342. break;
  343. }
  344. } while (!eof());
  345. }
  346. void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
  347. // We'll parse forward through the tokens until we hit
  348. // a closing brace or eof - note that getNextToken() will
  349. // parse macros, so this will magically work inside macro
  350. // definitions, too.
  351. unsigned StoredPosition = Tokens->getPosition();
  352. FormatToken *Tok = FormatTok;
  353. const FormatToken *PrevTok = Tok->Previous;
  354. // Keep a stack of positions of lbrace tokens. We will
  355. // update information about whether an lbrace starts a
  356. // braced init list or a different block during the loop.
  357. SmallVector<FormatToken *, 8> LBraceStack;
  358. assert(Tok->Tok.is(tok::l_brace));
  359. do {
  360. // Get next non-comment token.
  361. FormatToken *NextTok;
  362. unsigned ReadTokens = 0;
  363. do {
  364. NextTok = Tokens->getNextToken();
  365. ++ReadTokens;
  366. } while (NextTok->is(tok::comment));
  367. switch (Tok->Tok.getKind()) {
  368. case tok::l_brace:
  369. if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
  370. if (PrevTok->isOneOf(tok::colon, tok::less))
  371. // A ':' indicates this code is in a type, or a braced list
  372. // following a label in an object literal ({a: {b: 1}}).
  373. // A '<' could be an object used in a comparison, but that is nonsense
  374. // code (can never return true), so more likely it is a generic type
  375. // argument (`X<{a: string; b: number}>`).
  376. // The code below could be confused by semicolons between the
  377. // individual members in a type member list, which would normally
  378. // trigger BK_Block. In both cases, this must be parsed as an inline
  379. // braced init.
  380. Tok->BlockKind = BK_BracedInit;
  381. else if (PrevTok->is(tok::r_paren))
  382. // `) { }` can only occur in function or method declarations in JS.
  383. Tok->BlockKind = BK_Block;
  384. } else {
  385. Tok->BlockKind = BK_Unknown;
  386. }
  387. LBraceStack.push_back(Tok);
  388. break;
  389. case tok::r_brace:
  390. if (LBraceStack.empty())
  391. break;
  392. if (LBraceStack.back()->BlockKind == BK_Unknown) {
  393. bool ProbablyBracedList = false;
  394. if (Style.Language == FormatStyle::LK_Proto) {
  395. ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
  396. } else {
  397. // Using OriginalColumn to distinguish between ObjC methods and
  398. // binary operators is a bit hacky.
  399. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
  400. NextTok->OriginalColumn == 0;
  401. // If there is a comma, semicolon or right paren after the closing
  402. // brace, we assume this is a braced initializer list. Note that
  403. // regardless how we mark inner braces here, we will overwrite the
  404. // BlockKind later if we parse a braced list (where all blocks
  405. // inside are by default braced lists), or when we explicitly detect
  406. // blocks (for example while parsing lambdas).
  407. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
  408. // braced list in JS.
  409. ProbablyBracedList =
  410. (Style.Language == FormatStyle::LK_JavaScript &&
  411. NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
  412. Keywords.kw_as)) ||
  413. (Style.isCpp() && NextTok->is(tok::l_paren)) ||
  414. NextTok->isOneOf(tok::comma, tok::period, tok::colon,
  415. tok::r_paren, tok::r_square, tok::l_brace,
  416. tok::ellipsis) ||
  417. (NextTok->is(tok::identifier) &&
  418. !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
  419. (NextTok->is(tok::semi) &&
  420. (!ExpectClassBody || LBraceStack.size() != 1)) ||
  421. (NextTok->isBinaryOperator() && !NextIsObjCMethod);
  422. if (NextTok->is(tok::l_square)) {
  423. // We can have an array subscript after a braced init
  424. // list, but C++11 attributes are expected after blocks.
  425. NextTok = Tokens->getNextToken();
  426. ++ReadTokens;
  427. ProbablyBracedList = NextTok->isNot(tok::l_square);
  428. }
  429. }
  430. if (ProbablyBracedList) {
  431. Tok->BlockKind = BK_BracedInit;
  432. LBraceStack.back()->BlockKind = BK_BracedInit;
  433. } else {
  434. Tok->BlockKind = BK_Block;
  435. LBraceStack.back()->BlockKind = BK_Block;
  436. }
  437. }
  438. LBraceStack.pop_back();
  439. break;
  440. case tok::identifier:
  441. if (!Tok->is(TT_StatementMacro))
  442. break;
  443. LLVM_FALLTHROUGH;
  444. case tok::at:
  445. case tok::semi:
  446. case tok::kw_if:
  447. case tok::kw_while:
  448. case tok::kw_for:
  449. case tok::kw_switch:
  450. case tok::kw_try:
  451. case tok::kw___try:
  452. if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
  453. LBraceStack.back()->BlockKind = BK_Block;
  454. break;
  455. default:
  456. break;
  457. }
  458. PrevTok = Tok;
  459. Tok = NextTok;
  460. } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
  461. // Assume other blocks for all unclosed opening braces.
  462. for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
  463. if (LBraceStack[i]->BlockKind == BK_Unknown)
  464. LBraceStack[i]->BlockKind = BK_Block;
  465. }
  466. FormatTok = Tokens->setPosition(StoredPosition);
  467. }
  468. template <class T>
  469. static inline void hash_combine(std::size_t &seed, const T &v) {
  470. std::hash<T> hasher;
  471. seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
  472. }
  473. size_t UnwrappedLineParser::computePPHash() const {
  474. size_t h = 0;
  475. for (const auto &i : PPStack) {
  476. hash_combine(h, size_t(i.Kind));
  477. hash_combine(h, i.Line);
  478. }
  479. return h;
  480. }
  481. void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
  482. bool MunchSemi) {
  483. assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
  484. "'{' or macro block token expected");
  485. const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
  486. FormatTok->BlockKind = BK_Block;
  487. size_t PPStartHash = computePPHash();
  488. unsigned InitialLevel = Line->Level;
  489. nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
  490. if (MacroBlock && FormatTok->is(tok::l_paren))
  491. parseParens();
  492. size_t NbPreprocessorDirectives =
  493. CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
  494. addUnwrappedLine();
  495. size_t OpeningLineIndex =
  496. CurrentLines->empty()
  497. ? (UnwrappedLine::kInvalidIndex)
  498. : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
  499. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  500. MustBeDeclaration);
  501. if (AddLevel)
  502. ++Line->Level;
  503. parseLevel(/*HasOpeningBrace=*/true);
  504. if (eof())
  505. return;
  506. if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
  507. : !FormatTok->is(tok::r_brace)) {
  508. Line->Level = InitialLevel;
  509. FormatTok->BlockKind = BK_Block;
  510. return;
  511. }
  512. size_t PPEndHash = computePPHash();
  513. // Munch the closing brace.
  514. nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
  515. if (MacroBlock && FormatTok->is(tok::l_paren))
  516. parseParens();
  517. if (MunchSemi && FormatTok->Tok.is(tok::semi))
  518. nextToken();
  519. Line->Level = InitialLevel;
  520. if (PPStartHash == PPEndHash) {
  521. Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
  522. if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
  523. // Update the opening line to add the forward reference as well
  524. (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
  525. CurrentLines->size() - 1;
  526. }
  527. }
  528. }
  529. static bool isGoogScope(const UnwrappedLine &Line) {
  530. // FIXME: Closure-library specific stuff should not be hard-coded but be
  531. // configurable.
  532. if (Line.Tokens.size() < 4)
  533. return false;
  534. auto I = Line.Tokens.begin();
  535. if (I->Tok->TokenText != "goog")
  536. return false;
  537. ++I;
  538. if (I->Tok->isNot(tok::period))
  539. return false;
  540. ++I;
  541. if (I->Tok->TokenText != "scope")
  542. return false;
  543. ++I;
  544. return I->Tok->is(tok::l_paren);
  545. }
  546. static bool isIIFE(const UnwrappedLine &Line,
  547. const AdditionalKeywords &Keywords) {
  548. // Look for the start of an immediately invoked anonymous function.
  549. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
  550. // This is commonly done in JavaScript to create a new, anonymous scope.
  551. // Example: (function() { ... })()
  552. if (Line.Tokens.size() < 3)
  553. return false;
  554. auto I = Line.Tokens.begin();
  555. if (I->Tok->isNot(tok::l_paren))
  556. return false;
  557. ++I;
  558. if (I->Tok->isNot(Keywords.kw_function))
  559. return false;
  560. ++I;
  561. return I->Tok->is(tok::l_paren);
  562. }
  563. static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
  564. const FormatToken &InitialToken) {
  565. if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
  566. return Style.BraceWrapping.AfterNamespace;
  567. if (InitialToken.is(tok::kw_class))
  568. return Style.BraceWrapping.AfterClass;
  569. if (InitialToken.is(tok::kw_union))
  570. return Style.BraceWrapping.AfterUnion;
  571. if (InitialToken.is(tok::kw_struct))
  572. return Style.BraceWrapping.AfterStruct;
  573. return false;
  574. }
  575. void UnwrappedLineParser::parseChildBlock() {
  576. FormatTok->BlockKind = BK_Block;
  577. nextToken();
  578. {
  579. bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
  580. (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
  581. ScopedLineState LineState(*this);
  582. ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
  583. /*MustBeDeclaration=*/false);
  584. Line->Level += SkipIndent ? 0 : 1;
  585. parseLevel(/*HasOpeningBrace=*/true);
  586. flushComments(isOnNewLine(*FormatTok));
  587. Line->Level -= SkipIndent ? 0 : 1;
  588. }
  589. nextToken();
  590. }
  591. void UnwrappedLineParser::parsePPDirective() {
  592. assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
  593. ScopedMacroState MacroState(*Line, Tokens, FormatTok);
  594. nextToken();
  595. if (!FormatTok->Tok.getIdentifierInfo()) {
  596. parsePPUnknown();
  597. return;
  598. }
  599. switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
  600. case tok::pp_define:
  601. parsePPDefine();
  602. return;
  603. case tok::pp_if:
  604. parsePPIf(/*IfDef=*/false);
  605. break;
  606. case tok::pp_ifdef:
  607. case tok::pp_ifndef:
  608. parsePPIf(/*IfDef=*/true);
  609. break;
  610. case tok::pp_else:
  611. parsePPElse();
  612. break;
  613. case tok::pp_elif:
  614. parsePPElIf();
  615. break;
  616. case tok::pp_endif:
  617. parsePPEndIf();
  618. break;
  619. default:
  620. parsePPUnknown();
  621. break;
  622. }
  623. }
  624. void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
  625. size_t Line = CurrentLines->size();
  626. if (CurrentLines == &PreprocessorDirectives)
  627. Line += Lines.size();
  628. if (Unreachable ||
  629. (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
  630. PPStack.push_back({PP_Unreachable, Line});
  631. else
  632. PPStack.push_back({PP_Conditional, Line});
  633. }
  634. void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
  635. ++PPBranchLevel;
  636. assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
  637. if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
  638. PPLevelBranchIndex.push_back(0);
  639. PPLevelBranchCount.push_back(0);
  640. }
  641. PPChainBranchIndex.push(0);
  642. bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
  643. conditionalCompilationCondition(Unreachable || Skip);
  644. }
  645. void UnwrappedLineParser::conditionalCompilationAlternative() {
  646. if (!PPStack.empty())
  647. PPStack.pop_back();
  648. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  649. if (!PPChainBranchIndex.empty())
  650. ++PPChainBranchIndex.top();
  651. conditionalCompilationCondition(
  652. PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
  653. PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
  654. }
  655. void UnwrappedLineParser::conditionalCompilationEnd() {
  656. assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
  657. if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
  658. if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
  659. PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
  660. }
  661. }
  662. // Guard against #endif's without #if.
  663. if (PPBranchLevel > -1)
  664. --PPBranchLevel;
  665. if (!PPChainBranchIndex.empty())
  666. PPChainBranchIndex.pop();
  667. if (!PPStack.empty())
  668. PPStack.pop_back();
  669. }
  670. void UnwrappedLineParser::parsePPIf(bool IfDef) {
  671. bool IfNDef = FormatTok->is(tok::pp_ifndef);
  672. nextToken();
  673. bool Unreachable = false;
  674. if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
  675. Unreachable = true;
  676. if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
  677. Unreachable = true;
  678. conditionalCompilationStart(Unreachable);
  679. FormatToken *IfCondition = FormatTok;
  680. // If there's a #ifndef on the first line, and the only lines before it are
  681. // comments, it could be an include guard.
  682. bool MaybeIncludeGuard = IfNDef;
  683. if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
  684. for (auto &Line : Lines) {
  685. if (!Line.Tokens.front().Tok->is(tok::comment)) {
  686. MaybeIncludeGuard = false;
  687. IncludeGuard = IG_Rejected;
  688. break;
  689. }
  690. }
  691. --PPBranchLevel;
  692. parsePPUnknown();
  693. ++PPBranchLevel;
  694. if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
  695. IncludeGuard = IG_IfNdefed;
  696. IncludeGuardToken = IfCondition;
  697. }
  698. }
  699. void UnwrappedLineParser::parsePPElse() {
  700. // If a potential include guard has an #else, it's not an include guard.
  701. if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
  702. IncludeGuard = IG_Rejected;
  703. conditionalCompilationAlternative();
  704. if (PPBranchLevel > -1)
  705. --PPBranchLevel;
  706. parsePPUnknown();
  707. ++PPBranchLevel;
  708. }
  709. void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
  710. void UnwrappedLineParser::parsePPEndIf() {
  711. conditionalCompilationEnd();
  712. parsePPUnknown();
  713. // If the #endif of a potential include guard is the last thing in the file,
  714. // then we found an include guard.
  715. unsigned TokenPosition = Tokens->getPosition();
  716. FormatToken *PeekNext = AllTokens[TokenPosition];
  717. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
  718. PeekNext->is(tok::eof) &&
  719. Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  720. IncludeGuard = IG_Found;
  721. }
  722. void UnwrappedLineParser::parsePPDefine() {
  723. nextToken();
  724. if (!FormatTok->Tok.getIdentifierInfo()) {
  725. IncludeGuard = IG_Rejected;
  726. IncludeGuardToken = nullptr;
  727. parsePPUnknown();
  728. return;
  729. }
  730. if (IncludeGuard == IG_IfNdefed &&
  731. IncludeGuardToken->TokenText == FormatTok->TokenText) {
  732. IncludeGuard = IG_Defined;
  733. IncludeGuardToken = nullptr;
  734. for (auto &Line : Lines) {
  735. if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
  736. IncludeGuard = IG_Rejected;
  737. break;
  738. }
  739. }
  740. }
  741. nextToken();
  742. if (FormatTok->Tok.getKind() == tok::l_paren &&
  743. FormatTok->WhitespaceRange.getBegin() ==
  744. FormatTok->WhitespaceRange.getEnd()) {
  745. parseParens();
  746. }
  747. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  748. Line->Level += PPBranchLevel + 1;
  749. addUnwrappedLine();
  750. ++Line->Level;
  751. // Errors during a preprocessor directive can only affect the layout of the
  752. // preprocessor directive, and thus we ignore them. An alternative approach
  753. // would be to use the same approach we use on the file level (no
  754. // re-indentation if there was a structural error) within the macro
  755. // definition.
  756. parseFile();
  757. }
  758. void UnwrappedLineParser::parsePPUnknown() {
  759. do {
  760. nextToken();
  761. } while (!eof());
  762. if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
  763. Line->Level += PPBranchLevel + 1;
  764. addUnwrappedLine();
  765. }
  766. // Here we blacklist certain tokens that are not usually the first token in an
  767. // unwrapped line. This is used in attempt to distinguish macro calls without
  768. // trailing semicolons from other constructs split to several lines.
  769. static bool tokenCanStartNewLine(const clang::Token &Tok) {
  770. // Semicolon can be a null-statement, l_square can be a start of a macro or
  771. // a C++11 attribute, but this doesn't seem to be common.
  772. return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
  773. Tok.isNot(tok::l_square) &&
  774. // Tokens that can only be used as binary operators and a part of
  775. // overloaded operator names.
  776. Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
  777. Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
  778. Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
  779. Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
  780. Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
  781. Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
  782. Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
  783. Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
  784. Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
  785. Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
  786. Tok.isNot(tok::lesslessequal) &&
  787. // Colon is used in labels, base class lists, initializer lists,
  788. // range-based for loops, ternary operator, but should never be the
  789. // first token in an unwrapped line.
  790. Tok.isNot(tok::colon) &&
  791. // 'noexcept' is a trailing annotation.
  792. Tok.isNot(tok::kw_noexcept);
  793. }
  794. static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
  795. const FormatToken *FormatTok) {
  796. // FIXME: This returns true for C/C++ keywords like 'struct'.
  797. return FormatTok->is(tok::identifier) &&
  798. (FormatTok->Tok.getIdentifierInfo() == nullptr ||
  799. !FormatTok->isOneOf(
  800. Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
  801. Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
  802. Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
  803. Keywords.kw_let, Keywords.kw_var, tok::kw_const,
  804. Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
  805. Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
  806. Keywords.kw_from));
  807. }
  808. static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
  809. const FormatToken *FormatTok) {
  810. return FormatTok->Tok.isLiteral() ||
  811. FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
  812. mustBeJSIdent(Keywords, FormatTok);
  813. }
  814. // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
  815. // when encountered after a value (see mustBeJSIdentOrValue).
  816. static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
  817. const FormatToken *FormatTok) {
  818. return FormatTok->isOneOf(
  819. tok::kw_return, Keywords.kw_yield,
  820. // conditionals
  821. tok::kw_if, tok::kw_else,
  822. // loops
  823. tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
  824. // switch/case
  825. tok::kw_switch, tok::kw_case,
  826. // exceptions
  827. tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
  828. // declaration
  829. tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
  830. Keywords.kw_async, Keywords.kw_function,
  831. // import/export
  832. Keywords.kw_import, tok::kw_export);
  833. }
  834. // readTokenWithJavaScriptASI reads the next token and terminates the current
  835. // line if JavaScript Automatic Semicolon Insertion must
  836. // happen between the current token and the next token.
  837. //
  838. // This method is conservative - it cannot cover all edge cases of JavaScript,
  839. // but only aims to correctly handle certain well known cases. It *must not*
  840. // return true in speculative cases.
  841. void UnwrappedLineParser::readTokenWithJavaScriptASI() {
  842. FormatToken *Previous = FormatTok;
  843. readToken();
  844. FormatToken *Next = FormatTok;
  845. bool IsOnSameLine =
  846. CommentsBeforeNextToken.empty()
  847. ? Next->NewlinesBefore == 0
  848. : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
  849. if (IsOnSameLine)
  850. return;
  851. bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
  852. bool PreviousStartsTemplateExpr =
  853. Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
  854. if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
  855. // If the line contains an '@' sign, the previous token might be an
  856. // annotation, which can precede another identifier/value.
  857. bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
  858. [](UnwrappedLineNode &LineNode) {
  859. return LineNode.Tok->is(tok::at);
  860. }) != Line->Tokens.end();
  861. if (HasAt)
  862. return;
  863. }
  864. if (Next->is(tok::exclaim) && PreviousMustBeValue)
  865. return addUnwrappedLine();
  866. bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  867. bool NextEndsTemplateExpr =
  868. Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
  869. if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
  870. (PreviousMustBeValue ||
  871. Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
  872. tok::minusminus)))
  873. return addUnwrappedLine();
  874. if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
  875. isJSDeclOrStmt(Keywords, Next))
  876. return addUnwrappedLine();
  877. }
  878. void UnwrappedLineParser::parseStructuralElement() {
  879. assert(!FormatTok->is(tok::l_brace));
  880. if (Style.Language == FormatStyle::LK_TableGen &&
  881. FormatTok->is(tok::pp_include)) {
  882. nextToken();
  883. if (FormatTok->is(tok::string_literal))
  884. nextToken();
  885. addUnwrappedLine();
  886. return;
  887. }
  888. switch (FormatTok->Tok.getKind()) {
  889. case tok::kw_asm:
  890. nextToken();
  891. if (FormatTok->is(tok::l_brace)) {
  892. FormatTok->Type = TT_InlineASMBrace;
  893. nextToken();
  894. while (FormatTok && FormatTok->isNot(tok::eof)) {
  895. if (FormatTok->is(tok::r_brace)) {
  896. FormatTok->Type = TT_InlineASMBrace;
  897. nextToken();
  898. addUnwrappedLine();
  899. break;
  900. }
  901. FormatTok->Finalized = true;
  902. nextToken();
  903. }
  904. }
  905. break;
  906. case tok::kw_namespace:
  907. parseNamespace();
  908. return;
  909. case tok::kw_public:
  910. case tok::kw_protected:
  911. case tok::kw_private:
  912. if (Style.Language == FormatStyle::LK_Java ||
  913. Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
  914. nextToken();
  915. else
  916. parseAccessSpecifier();
  917. return;
  918. case tok::kw_if:
  919. parseIfThenElse();
  920. return;
  921. case tok::kw_for:
  922. case tok::kw_while:
  923. parseForOrWhileLoop();
  924. return;
  925. case tok::kw_do:
  926. parseDoWhile();
  927. return;
  928. case tok::kw_switch:
  929. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  930. // 'switch: string' field declaration.
  931. break;
  932. parseSwitch();
  933. return;
  934. case tok::kw_default:
  935. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  936. // 'default: string' field declaration.
  937. break;
  938. nextToken();
  939. if (FormatTok->is(tok::colon)) {
  940. parseLabel();
  941. return;
  942. }
  943. // e.g. "default void f() {}" in a Java interface.
  944. break;
  945. case tok::kw_case:
  946. if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
  947. // 'case: string' field declaration.
  948. break;
  949. parseCaseLabel();
  950. return;
  951. case tok::kw_try:
  952. case tok::kw___try:
  953. parseTryCatch();
  954. return;
  955. case tok::kw_extern:
  956. nextToken();
  957. if (FormatTok->Tok.is(tok::string_literal)) {
  958. nextToken();
  959. if (FormatTok->Tok.is(tok::l_brace)) {
  960. if (Style.BraceWrapping.AfterExternBlock) {
  961. addUnwrappedLine();
  962. parseBlock(/*MustBeDeclaration=*/true);
  963. } else {
  964. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
  965. }
  966. addUnwrappedLine();
  967. return;
  968. }
  969. }
  970. break;
  971. case tok::kw_export:
  972. if (Style.Language == FormatStyle::LK_JavaScript) {
  973. parseJavaScriptEs6ImportExport();
  974. return;
  975. }
  976. if (!Style.isCpp())
  977. break;
  978. // Handle C++ "(inline|export) namespace".
  979. LLVM_FALLTHROUGH;
  980. case tok::kw_inline:
  981. nextToken();
  982. if (FormatTok->Tok.is(tok::kw_namespace)) {
  983. parseNamespace();
  984. return;
  985. }
  986. break;
  987. case tok::identifier:
  988. if (FormatTok->is(TT_ForEachMacro)) {
  989. parseForOrWhileLoop();
  990. return;
  991. }
  992. if (FormatTok->is(TT_MacroBlockBegin)) {
  993. parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
  994. /*MunchSemi=*/false);
  995. return;
  996. }
  997. if (FormatTok->is(Keywords.kw_import)) {
  998. if (Style.Language == FormatStyle::LK_JavaScript) {
  999. parseJavaScriptEs6ImportExport();
  1000. return;
  1001. }
  1002. if (Style.Language == FormatStyle::LK_Proto) {
  1003. nextToken();
  1004. if (FormatTok->is(tok::kw_public))
  1005. nextToken();
  1006. if (!FormatTok->is(tok::string_literal))
  1007. return;
  1008. nextToken();
  1009. if (FormatTok->is(tok::semi))
  1010. nextToken();
  1011. addUnwrappedLine();
  1012. return;
  1013. }
  1014. }
  1015. if (Style.isCpp() &&
  1016. FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
  1017. Keywords.kw_slots, Keywords.kw_qslots)) {
  1018. nextToken();
  1019. if (FormatTok->is(tok::colon)) {
  1020. nextToken();
  1021. addUnwrappedLine();
  1022. return;
  1023. }
  1024. }
  1025. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1026. parseStatementMacro();
  1027. return;
  1028. }
  1029. if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
  1030. parseNamespace();
  1031. return;
  1032. }
  1033. // In all other cases, parse the declaration.
  1034. break;
  1035. default:
  1036. break;
  1037. }
  1038. do {
  1039. const FormatToken *Previous = FormatTok->Previous;
  1040. switch (FormatTok->Tok.getKind()) {
  1041. case tok::at:
  1042. nextToken();
  1043. if (FormatTok->Tok.is(tok::l_brace)) {
  1044. nextToken();
  1045. parseBracedList();
  1046. break;
  1047. } else if (Style.Language == FormatStyle::LK_Java &&
  1048. FormatTok->is(Keywords.kw_interface)) {
  1049. nextToken();
  1050. break;
  1051. }
  1052. switch (FormatTok->Tok.getObjCKeywordID()) {
  1053. case tok::objc_public:
  1054. case tok::objc_protected:
  1055. case tok::objc_package:
  1056. case tok::objc_private:
  1057. return parseAccessSpecifier();
  1058. case tok::objc_interface:
  1059. case tok::objc_implementation:
  1060. return parseObjCInterfaceOrImplementation();
  1061. case tok::objc_protocol:
  1062. if (parseObjCProtocol())
  1063. return;
  1064. break;
  1065. case tok::objc_end:
  1066. return; // Handled by the caller.
  1067. case tok::objc_optional:
  1068. case tok::objc_required:
  1069. nextToken();
  1070. addUnwrappedLine();
  1071. return;
  1072. case tok::objc_autoreleasepool:
  1073. nextToken();
  1074. if (FormatTok->Tok.is(tok::l_brace)) {
  1075. if (Style.BraceWrapping.AfterControlStatement)
  1076. addUnwrappedLine();
  1077. parseBlock(/*MustBeDeclaration=*/false);
  1078. }
  1079. addUnwrappedLine();
  1080. return;
  1081. case tok::objc_synchronized:
  1082. nextToken();
  1083. if (FormatTok->Tok.is(tok::l_paren))
  1084. // Skip synchronization object
  1085. parseParens();
  1086. if (FormatTok->Tok.is(tok::l_brace)) {
  1087. if (Style.BraceWrapping.AfterControlStatement)
  1088. addUnwrappedLine();
  1089. parseBlock(/*MustBeDeclaration=*/false);
  1090. }
  1091. addUnwrappedLine();
  1092. return;
  1093. case tok::objc_try:
  1094. // This branch isn't strictly necessary (the kw_try case below would
  1095. // do this too after the tok::at is parsed above). But be explicit.
  1096. parseTryCatch();
  1097. return;
  1098. default:
  1099. break;
  1100. }
  1101. break;
  1102. case tok::kw_enum:
  1103. // Ignore if this is part of "template <enum ...".
  1104. if (Previous && Previous->is(tok::less)) {
  1105. nextToken();
  1106. break;
  1107. }
  1108. // parseEnum falls through and does not yet add an unwrapped line as an
  1109. // enum definition can start a structural element.
  1110. if (!parseEnum())
  1111. break;
  1112. // This only applies for C++.
  1113. if (!Style.isCpp()) {
  1114. addUnwrappedLine();
  1115. return;
  1116. }
  1117. break;
  1118. case tok::kw_typedef:
  1119. nextToken();
  1120. if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
  1121. Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
  1122. Keywords.kw_CF_CLOSED_ENUM,
  1123. Keywords.kw_NS_CLOSED_ENUM))
  1124. parseEnum();
  1125. break;
  1126. case tok::kw_struct:
  1127. case tok::kw_union:
  1128. case tok::kw_class:
  1129. // parseRecord falls through and does not yet add an unwrapped line as a
  1130. // record declaration or definition can start a structural element.
  1131. parseRecord();
  1132. // This does not apply for Java, JavaScript and C#.
  1133. if (Style.Language == FormatStyle::LK_Java ||
  1134. Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
  1135. if (FormatTok->is(tok::semi))
  1136. nextToken();
  1137. addUnwrappedLine();
  1138. return;
  1139. }
  1140. break;
  1141. case tok::period:
  1142. nextToken();
  1143. // In Java, classes have an implicit static member "class".
  1144. if (Style.Language == FormatStyle::LK_Java && FormatTok &&
  1145. FormatTok->is(tok::kw_class))
  1146. nextToken();
  1147. if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
  1148. FormatTok->Tok.getIdentifierInfo())
  1149. // JavaScript only has pseudo keywords, all keywords are allowed to
  1150. // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
  1151. nextToken();
  1152. break;
  1153. case tok::semi:
  1154. nextToken();
  1155. addUnwrappedLine();
  1156. return;
  1157. case tok::r_brace:
  1158. addUnwrappedLine();
  1159. return;
  1160. case tok::l_paren:
  1161. parseParens();
  1162. break;
  1163. case tok::kw_operator:
  1164. nextToken();
  1165. if (FormatTok->isBinaryOperator())
  1166. nextToken();
  1167. break;
  1168. case tok::caret:
  1169. nextToken();
  1170. if (FormatTok->Tok.isAnyIdentifier() ||
  1171. FormatTok->isSimpleTypeSpecifier())
  1172. nextToken();
  1173. if (FormatTok->is(tok::l_paren))
  1174. parseParens();
  1175. if (FormatTok->is(tok::l_brace))
  1176. parseChildBlock();
  1177. break;
  1178. case tok::l_brace:
  1179. if (!tryToParseBracedList()) {
  1180. // A block outside of parentheses must be the last part of a
  1181. // structural element.
  1182. // FIXME: Figure out cases where this is not true, and add projections
  1183. // for them (the one we know is missing are lambdas).
  1184. if (Style.BraceWrapping.AfterFunction)
  1185. addUnwrappedLine();
  1186. FormatTok->Type = TT_FunctionLBrace;
  1187. parseBlock(/*MustBeDeclaration=*/false);
  1188. addUnwrappedLine();
  1189. return;
  1190. }
  1191. // Otherwise this was a braced init list, and the structural
  1192. // element continues.
  1193. break;
  1194. case tok::kw_try:
  1195. // We arrive here when parsing function-try blocks.
  1196. if (Style.BraceWrapping.AfterFunction)
  1197. addUnwrappedLine();
  1198. parseTryCatch();
  1199. return;
  1200. case tok::identifier: {
  1201. if (FormatTok->is(TT_MacroBlockEnd)) {
  1202. addUnwrappedLine();
  1203. return;
  1204. }
  1205. // Function declarations (as opposed to function expressions) are parsed
  1206. // on their own unwrapped line by continuing this loop. Function
  1207. // expressions (functions that are not on their own line) must not create
  1208. // a new unwrapped line, so they are special cased below.
  1209. size_t TokenCount = Line->Tokens.size();
  1210. if (Style.Language == FormatStyle::LK_JavaScript &&
  1211. FormatTok->is(Keywords.kw_function) &&
  1212. (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
  1213. Keywords.kw_async)))) {
  1214. tryToParseJSFunction();
  1215. break;
  1216. }
  1217. if ((Style.Language == FormatStyle::LK_JavaScript ||
  1218. Style.Language == FormatStyle::LK_Java) &&
  1219. FormatTok->is(Keywords.kw_interface)) {
  1220. if (Style.Language == FormatStyle::LK_JavaScript) {
  1221. // In JavaScript/TypeScript, "interface" can be used as a standalone
  1222. // identifier, e.g. in `var interface = 1;`. If "interface" is
  1223. // followed by another identifier, it is very like to be an actual
  1224. // interface declaration.
  1225. unsigned StoredPosition = Tokens->getPosition();
  1226. FormatToken *Next = Tokens->getNextToken();
  1227. FormatTok = Tokens->setPosition(StoredPosition);
  1228. if (Next && !mustBeJSIdent(Keywords, Next)) {
  1229. nextToken();
  1230. break;
  1231. }
  1232. }
  1233. parseRecord();
  1234. addUnwrappedLine();
  1235. return;
  1236. }
  1237. if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
  1238. parseStatementMacro();
  1239. return;
  1240. }
  1241. // See if the following token should start a new unwrapped line.
  1242. StringRef Text = FormatTok->TokenText;
  1243. nextToken();
  1244. // JS doesn't have macros, and within classes colons indicate fields, not
  1245. // labels.
  1246. if (Style.Language == FormatStyle::LK_JavaScript)
  1247. break;
  1248. TokenCount = Line->Tokens.size();
  1249. if (TokenCount == 1 ||
  1250. (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
  1251. if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
  1252. Line->Tokens.begin()->Tok->MustBreakBefore = true;
  1253. parseLabel();
  1254. return;
  1255. }
  1256. // Recognize function-like macro usages without trailing semicolon as
  1257. // well as free-standing macros like Q_OBJECT.
  1258. bool FunctionLike = FormatTok->is(tok::l_paren);
  1259. if (FunctionLike)
  1260. parseParens();
  1261. bool FollowedByNewline =
  1262. CommentsBeforeNextToken.empty()
  1263. ? FormatTok->NewlinesBefore > 0
  1264. : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
  1265. if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
  1266. tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
  1267. addUnwrappedLine();
  1268. return;
  1269. }
  1270. }
  1271. break;
  1272. }
  1273. case tok::equal:
  1274. // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
  1275. // TT_JsFatArrow. The always start an expression or a child block if
  1276. // followed by a curly.
  1277. if (FormatTok->is(TT_JsFatArrow)) {
  1278. nextToken();
  1279. if (FormatTok->is(tok::l_brace))
  1280. parseChildBlock();
  1281. break;
  1282. }
  1283. nextToken();
  1284. if (FormatTok->Tok.is(tok::l_brace)) {
  1285. nextToken();
  1286. parseBracedList();
  1287. } else if (Style.Language == FormatStyle::LK_Proto &&
  1288. FormatTok->Tok.is(tok::less)) {
  1289. nextToken();
  1290. parseBracedList(/*ContinueOnSemicolons=*/false,
  1291. /*ClosingBraceKind=*/tok::greater);
  1292. }
  1293. break;
  1294. case tok::l_square:
  1295. parseSquare();
  1296. break;
  1297. case tok::kw_new:
  1298. parseNew();
  1299. break;
  1300. default:
  1301. nextToken();
  1302. break;
  1303. }
  1304. } while (!eof());
  1305. }
  1306. bool UnwrappedLineParser::tryToParseLambda() {
  1307. if (!Style.isCpp()) {
  1308. nextToken();
  1309. return false;
  1310. }
  1311. assert(FormatTok->is(tok::l_square));
  1312. FormatToken &LSquare = *FormatTok;
  1313. if (!tryToParseLambdaIntroducer())
  1314. return false;
  1315. bool SeenArrow = false;
  1316. while (FormatTok->isNot(tok::l_brace)) {
  1317. if (FormatTok->isSimpleTypeSpecifier()) {
  1318. nextToken();
  1319. continue;
  1320. }
  1321. switch (FormatTok->Tok.getKind()) {
  1322. case tok::l_brace:
  1323. break;
  1324. case tok::l_paren:
  1325. parseParens();
  1326. break;
  1327. case tok::amp:
  1328. case tok::star:
  1329. case tok::kw_const:
  1330. case tok::comma:
  1331. case tok::less:
  1332. case tok::greater:
  1333. case tok::identifier:
  1334. case tok::numeric_constant:
  1335. case tok::coloncolon:
  1336. case tok::kw_mutable:
  1337. case tok::kw_noexcept:
  1338. nextToken();
  1339. break;
  1340. // Specialization of a template with an integer parameter can contain
  1341. // arithmetic, logical, comparison and ternary operators.
  1342. //
  1343. // FIXME: This also accepts sequences of operators that are not in the scope
  1344. // of a template argument list.
  1345. //
  1346. // In a C++ lambda a template type can only occur after an arrow. We use
  1347. // this as an heuristic to distinguish between Objective-C expressions
  1348. // followed by an `a->b` expression, such as:
  1349. // ([obj func:arg] + a->b)
  1350. // Otherwise the code below would parse as a lambda.
  1351. case tok::plus:
  1352. case tok::minus:
  1353. case tok::exclaim:
  1354. case tok::tilde:
  1355. case tok::slash:
  1356. case tok::percent:
  1357. case tok::lessless:
  1358. case tok::pipe:
  1359. case tok::pipepipe:
  1360. case tok::ampamp:
  1361. case tok::caret:
  1362. case tok::equalequal:
  1363. case tok::exclaimequal:
  1364. case tok::greaterequal:
  1365. case tok::lessequal:
  1366. case tok::question:
  1367. case tok::colon:
  1368. case tok::kw_true:
  1369. case tok::kw_false:
  1370. if (SeenArrow) {
  1371. nextToken();
  1372. break;
  1373. }
  1374. return true;
  1375. case tok::arrow:
  1376. // This might or might not actually be a lambda arrow (this could be an
  1377. // ObjC method invocation followed by a dereferencing arrow). We might
  1378. // reset this back to TT_Unknown in TokenAnnotator.
  1379. FormatTok->Type = TT_LambdaArrow;
  1380. SeenArrow = true;
  1381. nextToken();
  1382. break;
  1383. default:
  1384. return true;
  1385. }
  1386. }
  1387. FormatTok->Type = TT_LambdaLBrace;
  1388. LSquare.Type = TT_LambdaLSquare;
  1389. parseChildBlock();
  1390. return true;
  1391. }
  1392. bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
  1393. const FormatToken *Previous = FormatTok->Previous;
  1394. if (Previous &&
  1395. (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
  1396. tok::kw_delete, tok::l_square) ||
  1397. FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
  1398. Previous->isSimpleTypeSpecifier())) {
  1399. nextToken();
  1400. return false;
  1401. }
  1402. nextToken();
  1403. if (FormatTok->is(tok::l_square)) {
  1404. return false;
  1405. }
  1406. parseSquare(/*LambdaIntroducer=*/true);
  1407. return true;
  1408. }
  1409. void UnwrappedLineParser::tryToParseJSFunction() {
  1410. assert(FormatTok->is(Keywords.kw_function) ||
  1411. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
  1412. if (FormatTok->is(Keywords.kw_async))
  1413. nextToken();
  1414. // Consume "function".
  1415. nextToken();
  1416. // Consume * (generator function). Treat it like C++'s overloaded operators.
  1417. if (FormatTok->is(tok::star)) {
  1418. FormatTok->Type = TT_OverloadedOperator;
  1419. nextToken();
  1420. }
  1421. // Consume function name.
  1422. if (FormatTok->is(tok::identifier))
  1423. nextToken();
  1424. if (FormatTok->isNot(tok::l_paren))
  1425. return;
  1426. // Parse formal parameter list.
  1427. parseParens();
  1428. if (FormatTok->is(tok::colon)) {
  1429. // Parse a type definition.
  1430. nextToken();
  1431. // Eat the type declaration. For braced inline object types, balance braces,
  1432. // otherwise just parse until finding an l_brace for the function body.
  1433. if (FormatTok->is(tok::l_brace))
  1434. tryToParseBracedList();
  1435. else
  1436. while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
  1437. nextToken();
  1438. }
  1439. if (FormatTok->is(tok::semi))
  1440. return;
  1441. parseChildBlock();
  1442. }
  1443. bool UnwrappedLineParser::tryToParseBracedList() {
  1444. if (FormatTok->BlockKind == BK_Unknown)
  1445. calculateBraceTypes();
  1446. assert(FormatTok->BlockKind != BK_Unknown);
  1447. if (FormatTok->BlockKind == BK_Block)
  1448. return false;
  1449. nextToken();
  1450. parseBracedList();
  1451. return true;
  1452. }
  1453. bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
  1454. tok::TokenKind ClosingBraceKind) {
  1455. bool HasError = false;
  1456. // FIXME: Once we have an expression parser in the UnwrappedLineParser,
  1457. // replace this by using parseAssigmentExpression() inside.
  1458. do {
  1459. if (Style.Language == FormatStyle::LK_JavaScript) {
  1460. if (FormatTok->is(Keywords.kw_function) ||
  1461. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
  1462. tryToParseJSFunction();
  1463. continue;
  1464. }
  1465. if (FormatTok->is(TT_JsFatArrow)) {
  1466. nextToken();
  1467. // Fat arrows can be followed by simple expressions or by child blocks
  1468. // in curly braces.
  1469. if (FormatTok->is(tok::l_brace)) {
  1470. parseChildBlock();
  1471. continue;
  1472. }
  1473. }
  1474. if (FormatTok->is(tok::l_brace)) {
  1475. // Could be a method inside of a braced list `{a() { return 1; }}`.
  1476. if (tryToParseBracedList())
  1477. continue;
  1478. parseChildBlock();
  1479. }
  1480. }
  1481. if (FormatTok->Tok.getKind() == ClosingBraceKind) {
  1482. nextToken();
  1483. return !HasError;
  1484. }
  1485. switch (FormatTok->Tok.getKind()) {
  1486. case tok::caret:
  1487. nextToken();
  1488. if (FormatTok->is(tok::l_brace)) {
  1489. parseChildBlock();
  1490. }
  1491. break;
  1492. case tok::l_square:
  1493. tryToParseLambda();
  1494. break;
  1495. case tok::l_paren:
  1496. parseParens();
  1497. // JavaScript can just have free standing methods and getters/setters in
  1498. // object literals. Detect them by a "{" following ")".
  1499. if (Style.Language == FormatStyle::LK_JavaScript) {
  1500. if (FormatTok->is(tok::l_brace))
  1501. parseChildBlock();
  1502. break;
  1503. }
  1504. break;
  1505. case tok::l_brace:
  1506. // Assume there are no blocks inside a braced init list apart
  1507. // from the ones we explicitly parse out (like lambdas).
  1508. FormatTok->BlockKind = BK_BracedInit;
  1509. nextToken();
  1510. parseBracedList();
  1511. break;
  1512. case tok::less:
  1513. if (Style.Language == FormatStyle::LK_Proto) {
  1514. nextToken();
  1515. parseBracedList(/*ContinueOnSemicolons=*/false,
  1516. /*ClosingBraceKind=*/tok::greater);
  1517. } else {
  1518. nextToken();
  1519. }
  1520. break;
  1521. case tok::semi:
  1522. // JavaScript (or more precisely TypeScript) can have semicolons in braced
  1523. // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
  1524. // used for error recovery if we have otherwise determined that this is
  1525. // a braced list.
  1526. if (Style.Language == FormatStyle::LK_JavaScript) {
  1527. nextToken();
  1528. break;
  1529. }
  1530. HasError = true;
  1531. if (!ContinueOnSemicolons)
  1532. return !HasError;
  1533. nextToken();
  1534. break;
  1535. case tok::comma:
  1536. nextToken();
  1537. break;
  1538. default:
  1539. nextToken();
  1540. break;
  1541. }
  1542. } while (!eof());
  1543. return false;
  1544. }
  1545. void UnwrappedLineParser::parseParens() {
  1546. assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
  1547. nextToken();
  1548. do {
  1549. switch (FormatTok->Tok.getKind()) {
  1550. case tok::l_paren:
  1551. parseParens();
  1552. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
  1553. parseChildBlock();
  1554. break;
  1555. case tok::r_paren:
  1556. nextToken();
  1557. return;
  1558. case tok::r_brace:
  1559. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1560. return;
  1561. case tok::l_square:
  1562. tryToParseLambda();
  1563. break;
  1564. case tok::l_brace:
  1565. if (!tryToParseBracedList())
  1566. parseChildBlock();
  1567. break;
  1568. case tok::at:
  1569. nextToken();
  1570. if (FormatTok->Tok.is(tok::l_brace)) {
  1571. nextToken();
  1572. parseBracedList();
  1573. }
  1574. break;
  1575. case tok::kw_class:
  1576. if (Style.Language == FormatStyle::LK_JavaScript)
  1577. parseRecord(/*ParseAsExpr=*/true);
  1578. else
  1579. nextToken();
  1580. break;
  1581. case tok::identifier:
  1582. if (Style.Language == FormatStyle::LK_JavaScript &&
  1583. (FormatTok->is(Keywords.kw_function) ||
  1584. FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
  1585. tryToParseJSFunction();
  1586. else
  1587. nextToken();
  1588. break;
  1589. default:
  1590. nextToken();
  1591. break;
  1592. }
  1593. } while (!eof());
  1594. }
  1595. void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
  1596. if (!LambdaIntroducer) {
  1597. assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
  1598. if (tryToParseLambda())
  1599. return;
  1600. }
  1601. do {
  1602. switch (FormatTok->Tok.getKind()) {
  1603. case tok::l_paren:
  1604. parseParens();
  1605. break;
  1606. case tok::r_square:
  1607. nextToken();
  1608. return;
  1609. case tok::r_brace:
  1610. // A "}" inside parenthesis is an error if there wasn't a matching "{".
  1611. return;
  1612. case tok::l_square:
  1613. parseSquare();
  1614. break;
  1615. case tok::l_brace: {
  1616. if (!tryToParseBracedList())
  1617. parseChildBlock();
  1618. break;
  1619. }
  1620. case tok::at:
  1621. nextToken();
  1622. if (FormatTok->Tok.is(tok::l_brace)) {
  1623. nextToken();
  1624. parseBracedList();
  1625. }
  1626. break;
  1627. default:
  1628. nextToken();
  1629. break;
  1630. }
  1631. } while (!eof());
  1632. }
  1633. void UnwrappedLineParser::parseIfThenElse() {
  1634. assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
  1635. nextToken();
  1636. if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
  1637. nextToken();
  1638. if (FormatTok->Tok.is(tok::l_paren))
  1639. parseParens();
  1640. bool NeedsUnwrappedLine = false;
  1641. if (FormatTok->Tok.is(tok::l_brace)) {
  1642. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1643. parseBlock(/*MustBeDeclaration=*/false);
  1644. if (Style.BraceWrapping.BeforeElse)
  1645. addUnwrappedLine();
  1646. else
  1647. NeedsUnwrappedLine = true;
  1648. } else {
  1649. addUnwrappedLine();
  1650. ++Line->Level;
  1651. parseStructuralElement();
  1652. --Line->Level;
  1653. }
  1654. if (FormatTok->Tok.is(tok::kw_else)) {
  1655. nextToken();
  1656. if (FormatTok->Tok.is(tok::l_brace)) {
  1657. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1658. parseBlock(/*MustBeDeclaration=*/false);
  1659. addUnwrappedLine();
  1660. } else if (FormatTok->Tok.is(tok::kw_if)) {
  1661. parseIfThenElse();
  1662. } else {
  1663. addUnwrappedLine();
  1664. ++Line->Level;
  1665. parseStructuralElement();
  1666. if (FormatTok->is(tok::eof))
  1667. addUnwrappedLine();
  1668. --Line->Level;
  1669. }
  1670. } else if (NeedsUnwrappedLine) {
  1671. addUnwrappedLine();
  1672. }
  1673. }
  1674. void UnwrappedLineParser::parseTryCatch() {
  1675. assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
  1676. nextToken();
  1677. bool NeedsUnwrappedLine = false;
  1678. if (FormatTok->is(tok::colon)) {
  1679. // We are in a function try block, what comes is an initializer list.
  1680. nextToken();
  1681. while (FormatTok->is(tok::identifier)) {
  1682. nextToken();
  1683. if (FormatTok->is(tok::l_paren))
  1684. parseParens();
  1685. if (FormatTok->is(tok::comma))
  1686. nextToken();
  1687. }
  1688. }
  1689. // Parse try with resource.
  1690. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
  1691. parseParens();
  1692. }
  1693. if (FormatTok->is(tok::l_brace)) {
  1694. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1695. parseBlock(/*MustBeDeclaration=*/false);
  1696. if (Style.BraceWrapping.BeforeCatch) {
  1697. addUnwrappedLine();
  1698. } else {
  1699. NeedsUnwrappedLine = true;
  1700. }
  1701. } else if (!FormatTok->is(tok::kw_catch)) {
  1702. // The C++ standard requires a compound-statement after a try.
  1703. // If there's none, we try to assume there's a structuralElement
  1704. // and try to continue.
  1705. addUnwrappedLine();
  1706. ++Line->Level;
  1707. parseStructuralElement();
  1708. --Line->Level;
  1709. }
  1710. while (1) {
  1711. if (FormatTok->is(tok::at))
  1712. nextToken();
  1713. if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
  1714. tok::kw___finally) ||
  1715. ((Style.Language == FormatStyle::LK_Java ||
  1716. Style.Language == FormatStyle::LK_JavaScript) &&
  1717. FormatTok->is(Keywords.kw_finally)) ||
  1718. (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
  1719. FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
  1720. break;
  1721. nextToken();
  1722. while (FormatTok->isNot(tok::l_brace)) {
  1723. if (FormatTok->is(tok::l_paren)) {
  1724. parseParens();
  1725. continue;
  1726. }
  1727. if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
  1728. return;
  1729. nextToken();
  1730. }
  1731. NeedsUnwrappedLine = false;
  1732. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1733. parseBlock(/*MustBeDeclaration=*/false);
  1734. if (Style.BraceWrapping.BeforeCatch)
  1735. addUnwrappedLine();
  1736. else
  1737. NeedsUnwrappedLine = true;
  1738. }
  1739. if (NeedsUnwrappedLine)
  1740. addUnwrappedLine();
  1741. }
  1742. void UnwrappedLineParser::parseNamespace() {
  1743. assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  1744. "'namespace' expected");
  1745. const FormatToken &InitialToken = *FormatTok;
  1746. nextToken();
  1747. if (InitialToken.is(TT_NamespaceMacro)) {
  1748. parseParens();
  1749. } else {
  1750. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
  1751. tok::l_square)) {
  1752. if (FormatTok->is(tok::l_square))
  1753. parseSquare();
  1754. else
  1755. nextToken();
  1756. }
  1757. }
  1758. if (FormatTok->Tok.is(tok::l_brace)) {
  1759. if (ShouldBreakBeforeBrace(Style, InitialToken))
  1760. addUnwrappedLine();
  1761. bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
  1762. (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
  1763. DeclarationScopeStack.size() > 1);
  1764. parseBlock(/*MustBeDeclaration=*/true, AddLevel);
  1765. // Munch the semicolon after a namespace. This is more common than one would
  1766. // think. Puttin the semicolon into its own line is very ugly.
  1767. if (FormatTok->Tok.is(tok::semi))
  1768. nextToken();
  1769. addUnwrappedLine();
  1770. }
  1771. // FIXME: Add error handling.
  1772. }
  1773. void UnwrappedLineParser::parseNew() {
  1774. assert(FormatTok->is(tok::kw_new) && "'new' expected");
  1775. nextToken();
  1776. if (Style.Language != FormatStyle::LK_Java)
  1777. return;
  1778. // In Java, we can parse everything up to the parens, which aren't optional.
  1779. do {
  1780. // There should not be a ;, { or } before the new's open paren.
  1781. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
  1782. return;
  1783. // Consume the parens.
  1784. if (FormatTok->is(tok::l_paren)) {
  1785. parseParens();
  1786. // If there is a class body of an anonymous class, consume that as child.
  1787. if (FormatTok->is(tok::l_brace))
  1788. parseChildBlock();
  1789. return;
  1790. }
  1791. nextToken();
  1792. } while (!eof());
  1793. }
  1794. void UnwrappedLineParser::parseForOrWhileLoop() {
  1795. assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
  1796. "'for', 'while' or foreach macro expected");
  1797. nextToken();
  1798. // JS' for await ( ...
  1799. if (Style.Language == FormatStyle::LK_JavaScript &&
  1800. FormatTok->is(Keywords.kw_await))
  1801. nextToken();
  1802. if (FormatTok->Tok.is(tok::l_paren))
  1803. parseParens();
  1804. if (FormatTok->Tok.is(tok::l_brace)) {
  1805. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1806. parseBlock(/*MustBeDeclaration=*/false);
  1807. addUnwrappedLine();
  1808. } else {
  1809. addUnwrappedLine();
  1810. ++Line->Level;
  1811. parseStructuralElement();
  1812. --Line->Level;
  1813. }
  1814. }
  1815. void UnwrappedLineParser::parseDoWhile() {
  1816. assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
  1817. nextToken();
  1818. if (FormatTok->Tok.is(tok::l_brace)) {
  1819. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1820. parseBlock(/*MustBeDeclaration=*/false);
  1821. if (Style.BraceWrapping.IndentBraces)
  1822. addUnwrappedLine();
  1823. } else {
  1824. addUnwrappedLine();
  1825. ++Line->Level;
  1826. parseStructuralElement();
  1827. --Line->Level;
  1828. }
  1829. // FIXME: Add error handling.
  1830. if (!FormatTok->Tok.is(tok::kw_while)) {
  1831. addUnwrappedLine();
  1832. return;
  1833. }
  1834. nextToken();
  1835. parseStructuralElement();
  1836. }
  1837. void UnwrappedLineParser::parseLabel() {
  1838. nextToken();
  1839. unsigned OldLineLevel = Line->Level;
  1840. if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
  1841. --Line->Level;
  1842. if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
  1843. CompoundStatementIndenter Indenter(this, Line->Level,
  1844. Style.BraceWrapping.AfterCaseLabel,
  1845. Style.BraceWrapping.IndentBraces);
  1846. parseBlock(/*MustBeDeclaration=*/false);
  1847. if (FormatTok->Tok.is(tok::kw_break)) {
  1848. if (Style.BraceWrapping.AfterControlStatement)
  1849. addUnwrappedLine();
  1850. parseStructuralElement();
  1851. }
  1852. addUnwrappedLine();
  1853. } else {
  1854. if (FormatTok->is(tok::semi))
  1855. nextToken();
  1856. addUnwrappedLine();
  1857. }
  1858. Line->Level = OldLineLevel;
  1859. if (FormatTok->isNot(tok::l_brace)) {
  1860. parseStructuralElement();
  1861. addUnwrappedLine();
  1862. }
  1863. }
  1864. void UnwrappedLineParser::parseCaseLabel() {
  1865. assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
  1866. // FIXME: fix handling of complex expressions here.
  1867. do {
  1868. nextToken();
  1869. } while (!eof() && !FormatTok->Tok.is(tok::colon));
  1870. parseLabel();
  1871. }
  1872. void UnwrappedLineParser::parseSwitch() {
  1873. assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
  1874. nextToken();
  1875. if (FormatTok->Tok.is(tok::l_paren))
  1876. parseParens();
  1877. if (FormatTok->Tok.is(tok::l_brace)) {
  1878. CompoundStatementIndenter Indenter(this, Style, Line->Level);
  1879. parseBlock(/*MustBeDeclaration=*/false);
  1880. addUnwrappedLine();
  1881. } else {
  1882. addUnwrappedLine();
  1883. ++Line->Level;
  1884. parseStructuralElement();
  1885. --Line->Level;
  1886. }
  1887. }
  1888. void UnwrappedLineParser::parseAccessSpecifier() {
  1889. nextToken();
  1890. // Understand Qt's slots.
  1891. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
  1892. nextToken();
  1893. // Otherwise, we don't know what it is, and we'd better keep the next token.
  1894. if (FormatTok->Tok.is(tok::colon))
  1895. nextToken();
  1896. addUnwrappedLine();
  1897. }
  1898. bool UnwrappedLineParser::parseEnum() {
  1899. // Won't be 'enum' for NS_ENUMs.
  1900. if (FormatTok->Tok.is(tok::kw_enum))
  1901. nextToken();
  1902. // In TypeScript, "enum" can also be used as property name, e.g. in interface
  1903. // declarations. An "enum" keyword followed by a colon would be a syntax
  1904. // error and thus assume it is just an identifier.
  1905. if (Style.Language == FormatStyle::LK_JavaScript &&
  1906. FormatTok->isOneOf(tok::colon, tok::question))
  1907. return false;
  1908. // In protobuf, "enum" can be used as a field name.
  1909. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
  1910. return false;
  1911. // Eat up enum class ...
  1912. if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
  1913. nextToken();
  1914. while (FormatTok->Tok.getIdentifierInfo() ||
  1915. FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
  1916. tok::greater, tok::comma, tok::question)) {
  1917. nextToken();
  1918. // We can have macros or attributes in between 'enum' and the enum name.
  1919. if (FormatTok->is(tok::l_paren))
  1920. parseParens();
  1921. if (FormatTok->is(tok::identifier)) {
  1922. nextToken();
  1923. // If there are two identifiers in a row, this is likely an elaborate
  1924. // return type. In Java, this can be "implements", etc.
  1925. if (Style.isCpp() && FormatTok->is(tok::identifier))
  1926. return false;
  1927. }
  1928. }
  1929. // Just a declaration or something is wrong.
  1930. if (FormatTok->isNot(tok::l_brace))
  1931. return true;
  1932. FormatTok->BlockKind = BK_Block;
  1933. if (Style.Language == FormatStyle::LK_Java) {
  1934. // Java enums are different.
  1935. parseJavaEnumBody();
  1936. return true;
  1937. }
  1938. if (Style.Language == FormatStyle::LK_Proto) {
  1939. parseBlock(/*MustBeDeclaration=*/true);
  1940. return true;
  1941. }
  1942. // Parse enum body.
  1943. nextToken();
  1944. bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
  1945. if (HasError) {
  1946. if (FormatTok->is(tok::semi))
  1947. nextToken();
  1948. addUnwrappedLine();
  1949. }
  1950. return true;
  1951. // There is no addUnwrappedLine() here so that we fall through to parsing a
  1952. // structural element afterwards. Thus, in "enum A {} n, m;",
  1953. // "} n, m;" will end up in one unwrapped line.
  1954. }
  1955. void UnwrappedLineParser::parseJavaEnumBody() {
  1956. // Determine whether the enum is simple, i.e. does not have a semicolon or
  1957. // constants with class bodies. Simple enums can be formatted like braced
  1958. // lists, contracted to a single line, etc.
  1959. unsigned StoredPosition = Tokens->getPosition();
  1960. bool IsSimple = true;
  1961. FormatToken *Tok = Tokens->getNextToken();
  1962. while (Tok) {
  1963. if (Tok->is(tok::r_brace))
  1964. break;
  1965. if (Tok->isOneOf(tok::l_brace, tok::semi)) {
  1966. IsSimple = false;
  1967. break;
  1968. }
  1969. // FIXME: This will also mark enums with braces in the arguments to enum
  1970. // constants as "not simple". This is probably fine in practice, though.
  1971. Tok = Tokens->getNextToken();
  1972. }
  1973. FormatTok = Tokens->setPosition(StoredPosition);
  1974. if (IsSimple) {
  1975. nextToken();
  1976. parseBracedList();
  1977. addUnwrappedLine();
  1978. return;
  1979. }
  1980. // Parse the body of a more complex enum.
  1981. // First add a line for everything up to the "{".
  1982. nextToken();
  1983. addUnwrappedLine();
  1984. ++Line->Level;
  1985. // Parse the enum constants.
  1986. while (FormatTok) {
  1987. if (FormatTok->is(tok::l_brace)) {
  1988. // Parse the constant's class body.
  1989. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  1990. /*MunchSemi=*/false);
  1991. } else if (FormatTok->is(tok::l_paren)) {
  1992. parseParens();
  1993. } else if (FormatTok->is(tok::comma)) {
  1994. nextToken();
  1995. addUnwrappedLine();
  1996. } else if (FormatTok->is(tok::semi)) {
  1997. nextToken();
  1998. addUnwrappedLine();
  1999. break;
  2000. } else if (FormatTok->is(tok::r_brace)) {
  2001. addUnwrappedLine();
  2002. break;
  2003. } else {
  2004. nextToken();
  2005. }
  2006. }
  2007. // Parse the class body after the enum's ";" if any.
  2008. parseLevel(/*HasOpeningBrace=*/true);
  2009. nextToken();
  2010. --Line->Level;
  2011. addUnwrappedLine();
  2012. }
  2013. void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
  2014. const FormatToken &InitialToken = *FormatTok;
  2015. nextToken();
  2016. // The actual identifier can be a nested name specifier, and in macros
  2017. // it is often token-pasted.
  2018. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
  2019. tok::kw___attribute, tok::kw___declspec,
  2020. tok::kw_alignas) ||
  2021. ((Style.Language == FormatStyle::LK_Java ||
  2022. Style.Language == FormatStyle::LK_JavaScript) &&
  2023. FormatTok->isOneOf(tok::period, tok::comma))) {
  2024. if (Style.Language == FormatStyle::LK_JavaScript &&
  2025. FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
  2026. // JavaScript/TypeScript supports inline object types in
  2027. // extends/implements positions:
  2028. // class Foo implements {bar: number} { }
  2029. nextToken();
  2030. if (FormatTok->is(tok::l_brace)) {
  2031. tryToParseBracedList();
  2032. continue;
  2033. }
  2034. }
  2035. bool IsNonMacroIdentifier =
  2036. FormatTok->is(tok::identifier) &&
  2037. FormatTok->TokenText != FormatTok->TokenText.upper();
  2038. nextToken();
  2039. // We can have macros or attributes in between 'class' and the class name.
  2040. if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
  2041. parseParens();
  2042. }
  2043. // Note that parsing away template declarations here leads to incorrectly
  2044. // accepting function declarations as record declarations.
  2045. // In general, we cannot solve this problem. Consider:
  2046. // class A<int> B() {}
  2047. // which can be a function definition or a class definition when B() is a
  2048. // macro. If we find enough real-world cases where this is a problem, we
  2049. // can parse for the 'template' keyword in the beginning of the statement,
  2050. // and thus rule out the record production in case there is no template
  2051. // (this would still leave us with an ambiguity between template function
  2052. // and class declarations).
  2053. if (FormatTok->isOneOf(tok::colon, tok::less)) {
  2054. while (!eof()) {
  2055. if (FormatTok->is(tok::l_brace)) {
  2056. calculateBraceTypes(/*ExpectClassBody=*/true);
  2057. if (!tryToParseBracedList())
  2058. break;
  2059. }
  2060. if (FormatTok->Tok.is(tok::semi))
  2061. return;
  2062. nextToken();
  2063. }
  2064. }
  2065. if (FormatTok->Tok.is(tok::l_brace)) {
  2066. if (ParseAsExpr) {
  2067. parseChildBlock();
  2068. } else {
  2069. if (ShouldBreakBeforeBrace(Style, InitialToken))
  2070. addUnwrappedLine();
  2071. parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
  2072. /*MunchSemi=*/false);
  2073. }
  2074. }
  2075. // There is no addUnwrappedLine() here so that we fall through to parsing a
  2076. // structural element afterwards. Thus, in "class A {} n, m;",
  2077. // "} n, m;" will end up in one unwrapped line.
  2078. }
  2079. void UnwrappedLineParser::parseObjCMethod() {
  2080. assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
  2081. "'(' or identifier expected.");
  2082. do {
  2083. if (FormatTok->Tok.is(tok::semi)) {
  2084. nextToken();
  2085. addUnwrappedLine();
  2086. return;
  2087. } else if (FormatTok->Tok.is(tok::l_brace)) {
  2088. if (Style.BraceWrapping.AfterFunction)
  2089. addUnwrappedLine();
  2090. parseBlock(/*MustBeDeclaration=*/false);
  2091. addUnwrappedLine();
  2092. return;
  2093. } else {
  2094. nextToken();
  2095. }
  2096. } while (!eof());
  2097. }
  2098. void UnwrappedLineParser::parseObjCProtocolList() {
  2099. assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
  2100. do {
  2101. nextToken();
  2102. // Early exit in case someone forgot a close angle.
  2103. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2104. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2105. return;
  2106. } while (!eof() && FormatTok->Tok.isNot(tok::greater));
  2107. nextToken(); // Skip '>'.
  2108. }
  2109. void UnwrappedLineParser::parseObjCUntilAtEnd() {
  2110. do {
  2111. if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
  2112. nextToken();
  2113. addUnwrappedLine();
  2114. break;
  2115. }
  2116. if (FormatTok->is(tok::l_brace)) {
  2117. parseBlock(/*MustBeDeclaration=*/false);
  2118. // In ObjC interfaces, nothing should be following the "}".
  2119. addUnwrappedLine();
  2120. } else if (FormatTok->is(tok::r_brace)) {
  2121. // Ignore stray "}". parseStructuralElement doesn't consume them.
  2122. nextToken();
  2123. addUnwrappedLine();
  2124. } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
  2125. nextToken();
  2126. parseObjCMethod();
  2127. } else {
  2128. parseStructuralElement();
  2129. }
  2130. } while (!eof());
  2131. }
  2132. void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
  2133. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
  2134. FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
  2135. nextToken();
  2136. nextToken(); // interface name
  2137. // @interface can be followed by a lightweight generic
  2138. // specialization list, then either a base class or a category.
  2139. if (FormatTok->Tok.is(tok::less)) {
  2140. // Unlike protocol lists, generic parameterizations support
  2141. // nested angles:
  2142. //
  2143. // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
  2144. // NSObject <NSCopying, NSSecureCoding>
  2145. //
  2146. // so we need to count how many open angles we have left.
  2147. unsigned NumOpenAngles = 1;
  2148. do {
  2149. nextToken();
  2150. // Early exit in case someone forgot a close angle.
  2151. if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
  2152. FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
  2153. break;
  2154. if (FormatTok->Tok.is(tok::less))
  2155. ++NumOpenAngles;
  2156. else if (FormatTok->Tok.is(tok::greater)) {
  2157. assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
  2158. --NumOpenAngles;
  2159. }
  2160. } while (!eof() && NumOpenAngles != 0);
  2161. nextToken(); // Skip '>'.
  2162. }
  2163. if (FormatTok->Tok.is(tok::colon)) {
  2164. nextToken();
  2165. nextToken(); // base class name
  2166. } else if (FormatTok->Tok.is(tok::l_paren))
  2167. // Skip category, if present.
  2168. parseParens();
  2169. if (FormatTok->Tok.is(tok::less))
  2170. parseObjCProtocolList();
  2171. if (FormatTok->Tok.is(tok::l_brace)) {
  2172. if (Style.BraceWrapping.AfterObjCDeclaration)
  2173. addUnwrappedLine();
  2174. parseBlock(/*MustBeDeclaration=*/true);
  2175. }
  2176. // With instance variables, this puts '}' on its own line. Without instance
  2177. // variables, this ends the @interface line.
  2178. addUnwrappedLine();
  2179. parseObjCUntilAtEnd();
  2180. }
  2181. // Returns true for the declaration/definition form of @protocol,
  2182. // false for the expression form.
  2183. bool UnwrappedLineParser::parseObjCProtocol() {
  2184. assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
  2185. nextToken();
  2186. if (FormatTok->is(tok::l_paren))
  2187. // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
  2188. return false;
  2189. // The definition/declaration form,
  2190. // @protocol Foo
  2191. // - (int)someMethod;
  2192. // @end
  2193. nextToken(); // protocol name
  2194. if (FormatTok->Tok.is(tok::less))
  2195. parseObjCProtocolList();
  2196. // Check for protocol declaration.
  2197. if (FormatTok->Tok.is(tok::semi)) {
  2198. nextToken();
  2199. addUnwrappedLine();
  2200. return true;
  2201. }
  2202. addUnwrappedLine();
  2203. parseObjCUntilAtEnd();
  2204. return true;
  2205. }
  2206. void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
  2207. bool IsImport = FormatTok->is(Keywords.kw_import);
  2208. assert(IsImport || FormatTok->is(tok::kw_export));
  2209. nextToken();
  2210. // Consume the "default" in "export default class/function".
  2211. if (FormatTok->is(tok::kw_default))
  2212. nextToken();
  2213. // Consume "async function", "function" and "default function", so that these
  2214. // get parsed as free-standing JS functions, i.e. do not require a trailing
  2215. // semicolon.
  2216. if (FormatTok->is(Keywords.kw_async))
  2217. nextToken();
  2218. if (FormatTok->is(Keywords.kw_function)) {
  2219. nextToken();
  2220. return;
  2221. }
  2222. // For imports, `export *`, `export {...}`, consume the rest of the line up
  2223. // to the terminating `;`. For everything else, just return and continue
  2224. // parsing the structural element, i.e. the declaration or expression for
  2225. // `export default`.
  2226. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
  2227. !FormatTok->isStringLiteral())
  2228. return;
  2229. while (!eof()) {
  2230. if (FormatTok->is(tok::semi))
  2231. return;
  2232. if (Line->Tokens.empty()) {
  2233. // Common issue: Automatic Semicolon Insertion wrapped the line, so the
  2234. // import statement should terminate.
  2235. return;
  2236. }
  2237. if (FormatTok->is(tok::l_brace)) {
  2238. FormatTok->BlockKind = BK_Block;
  2239. nextToken();
  2240. parseBracedList();
  2241. } else {
  2242. nextToken();
  2243. }
  2244. }
  2245. }
  2246. void UnwrappedLineParser::parseStatementMacro() {
  2247. nextToken();
  2248. if (FormatTok->is(tok::l_paren))
  2249. parseParens();
  2250. if (FormatTok->is(tok::semi))
  2251. nextToken();
  2252. addUnwrappedLine();
  2253. }
  2254. LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
  2255. StringRef Prefix = "") {
  2256. llvm::dbgs() << Prefix << "Line(" << Line.Level
  2257. << ", FSC=" << Line.FirstStartColumn << ")"
  2258. << (Line.InPPDirective ? " MACRO" : "") << ": ";
  2259. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2260. E = Line.Tokens.end();
  2261. I != E; ++I) {
  2262. llvm::dbgs() << I->Tok->Tok.getName() << "["
  2263. << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
  2264. << "] ";
  2265. }
  2266. for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
  2267. E = Line.Tokens.end();
  2268. I != E; ++I) {
  2269. const UnwrappedLineNode &Node = *I;
  2270. for (SmallVectorImpl<UnwrappedLine>::const_iterator
  2271. I = Node.Children.begin(),
  2272. E = Node.Children.end();
  2273. I != E; ++I) {
  2274. printDebugInfo(*I, "\nChild: ");
  2275. }
  2276. }
  2277. llvm::dbgs() << "\n";
  2278. }
  2279. void UnwrappedLineParser::addUnwrappedLine() {
  2280. if (Line->Tokens.empty())
  2281. return;
  2282. LLVM_DEBUG({
  2283. if (CurrentLines == &Lines)
  2284. printDebugInfo(*Line);
  2285. });
  2286. CurrentLines->push_back(std::move(*Line));
  2287. Line->Tokens.clear();
  2288. Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
  2289. Line->FirstStartColumn = 0;
  2290. if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
  2291. CurrentLines->append(
  2292. std::make_move_iterator(PreprocessorDirectives.begin()),
  2293. std::make_move_iterator(PreprocessorDirectives.end()));
  2294. PreprocessorDirectives.clear();
  2295. }
  2296. // Disconnect the current token from the last token on the previous line.
  2297. FormatTok->Previous = nullptr;
  2298. }
  2299. bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
  2300. bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
  2301. return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
  2302. FormatTok.NewlinesBefore > 0;
  2303. }
  2304. // Checks if \p FormatTok is a line comment that continues the line comment
  2305. // section on \p Line.
  2306. static bool continuesLineCommentSection(const FormatToken &FormatTok,
  2307. const UnwrappedLine &Line,
  2308. llvm::Regex &CommentPragmasRegex) {
  2309. if (Line.Tokens.empty())
  2310. return false;
  2311. StringRef IndentContent = FormatTok.TokenText;
  2312. if (FormatTok.TokenText.startswith("//") ||
  2313. FormatTok.TokenText.startswith("/*"))
  2314. IndentContent = FormatTok.TokenText.substr(2);
  2315. if (CommentPragmasRegex.match(IndentContent))
  2316. return false;
  2317. // If Line starts with a line comment, then FormatTok continues the comment
  2318. // section if its original column is greater or equal to the original start
  2319. // column of the line.
  2320. //
  2321. // Define the min column token of a line as follows: if a line ends in '{' or
  2322. // contains a '{' followed by a line comment, then the min column token is
  2323. // that '{'. Otherwise, the min column token of the line is the first token of
  2324. // the line.
  2325. //
  2326. // If Line starts with a token other than a line comment, then FormatTok
  2327. // continues the comment section if its original column is greater than the
  2328. // original start column of the min column token of the line.
  2329. //
  2330. // For example, the second line comment continues the first in these cases:
  2331. //
  2332. // // first line
  2333. // // second line
  2334. //
  2335. // and:
  2336. //
  2337. // // first line
  2338. // // second line
  2339. //
  2340. // and:
  2341. //
  2342. // int i; // first line
  2343. // // second line
  2344. //
  2345. // and:
  2346. //
  2347. // do { // first line
  2348. // // second line
  2349. // int i;
  2350. // } while (true);
  2351. //
  2352. // and:
  2353. //
  2354. // enum {
  2355. // a, // first line
  2356. // // second line
  2357. // b
  2358. // };
  2359. //
  2360. // The second line comment doesn't continue the first in these cases:
  2361. //
  2362. // // first line
  2363. // // second line
  2364. //
  2365. // and:
  2366. //
  2367. // int i; // first line
  2368. // // second line
  2369. //
  2370. // and:
  2371. //
  2372. // do { // first line
  2373. // // second line
  2374. // int i;
  2375. // } while (true);
  2376. //
  2377. // and:
  2378. //
  2379. // enum {
  2380. // a, // first line
  2381. // // second line
  2382. // };
  2383. const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
  2384. // Scan for '{//'. If found, use the column of '{' as a min column for line
  2385. // comment section continuation.
  2386. const FormatToken *PreviousToken = nullptr;
  2387. for (const UnwrappedLineNode &Node : Line.Tokens) {
  2388. if (PreviousToken && PreviousToken->is(tok::l_brace) &&
  2389. isLineComment(*Node.Tok)) {
  2390. MinColumnToken = PreviousToken;
  2391. break;
  2392. }
  2393. PreviousToken = Node.Tok;
  2394. // Grab the last newline preceding a token in this unwrapped line.
  2395. if (Node.Tok->NewlinesBefore > 0) {
  2396. MinColumnToken = Node.Tok;
  2397. }
  2398. }
  2399. if (PreviousToken && PreviousToken->is(tok::l_brace)) {
  2400. MinColumnToken = PreviousToken;
  2401. }
  2402. return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
  2403. MinColumnToken);
  2404. }
  2405. void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
  2406. bool JustComments = Line->Tokens.empty();
  2407. for (SmallVectorImpl<FormatToken *>::const_iterator
  2408. I = CommentsBeforeNextToken.begin(),
  2409. E = CommentsBeforeNextToken.end();
  2410. I != E; ++I) {
  2411. // Line comments that belong to the same line comment section are put on the
  2412. // same line since later we might want to reflow content between them.
  2413. // Additional fine-grained breaking of line comment sections is controlled
  2414. // by the class BreakableLineCommentSection in case it is desirable to keep
  2415. // several line comment sections in the same unwrapped line.
  2416. //
  2417. // FIXME: Consider putting separate line comment sections as children to the
  2418. // unwrapped line instead.
  2419. (*I)->ContinuesLineCommentSection =
  2420. continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
  2421. if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
  2422. addUnwrappedLine();
  2423. pushToken(*I);
  2424. }
  2425. if (NewlineBeforeNext && JustComments)
  2426. addUnwrappedLine();
  2427. CommentsBeforeNextToken.clear();
  2428. }
  2429. void UnwrappedLineParser::nextToken(int LevelDifference) {
  2430. if (eof())
  2431. return;
  2432. flushComments(isOnNewLine(*FormatTok));
  2433. pushToken(FormatTok);
  2434. FormatToken *Previous = FormatTok;
  2435. if (Style.Language != FormatStyle::LK_JavaScript)
  2436. readToken(LevelDifference);
  2437. else
  2438. readTokenWithJavaScriptASI();
  2439. FormatTok->Previous = Previous;
  2440. }
  2441. void UnwrappedLineParser::distributeComments(
  2442. const SmallVectorImpl<FormatToken *> &Comments,
  2443. const FormatToken *NextTok) {
  2444. // Whether or not a line comment token continues a line is controlled by
  2445. // the method continuesLineCommentSection, with the following caveat:
  2446. //
  2447. // Define a trail of Comments to be a nonempty proper postfix of Comments such
  2448. // that each comment line from the trail is aligned with the next token, if
  2449. // the next token exists. If a trail exists, the beginning of the maximal
  2450. // trail is marked as a start of a new comment section.
  2451. //
  2452. // For example in this code:
  2453. //
  2454. // int a; // line about a
  2455. // // line 1 about b
  2456. // // line 2 about b
  2457. // int b;
  2458. //
  2459. // the two lines about b form a maximal trail, so there are two sections, the
  2460. // first one consisting of the single comment "// line about a" and the
  2461. // second one consisting of the next two comments.
  2462. if (Comments.empty())
  2463. return;
  2464. bool ShouldPushCommentsInCurrentLine = true;
  2465. bool HasTrailAlignedWithNextToken = false;
  2466. unsigned StartOfTrailAlignedWithNextToken = 0;
  2467. if (NextTok) {
  2468. // We are skipping the first element intentionally.
  2469. for (unsigned i = Comments.size() - 1; i > 0; --i) {
  2470. if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
  2471. HasTrailAlignedWithNextToken = true;
  2472. StartOfTrailAlignedWithNextToken = i;
  2473. }
  2474. }
  2475. }
  2476. for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
  2477. FormatToken *FormatTok = Comments[i];
  2478. if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
  2479. FormatTok->ContinuesLineCommentSection = false;
  2480. } else {
  2481. FormatTok->ContinuesLineCommentSection =
  2482. continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
  2483. }
  2484. if (!FormatTok->ContinuesLineCommentSection &&
  2485. (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
  2486. ShouldPushCommentsInCurrentLine = false;
  2487. }
  2488. if (ShouldPushCommentsInCurrentLine) {
  2489. pushToken(FormatTok);
  2490. } else {
  2491. CommentsBeforeNextToken.push_back(FormatTok);
  2492. }
  2493. }
  2494. }
  2495. void UnwrappedLineParser::readToken(int LevelDifference) {
  2496. SmallVector<FormatToken *, 1> Comments;
  2497. do {
  2498. FormatTok = Tokens->getNextToken();
  2499. assert(FormatTok);
  2500. while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
  2501. (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
  2502. distributeComments(Comments, FormatTok);
  2503. Comments.clear();
  2504. // If there is an unfinished unwrapped line, we flush the preprocessor
  2505. // directives only after that unwrapped line was finished later.
  2506. bool SwitchToPreprocessorLines = !Line->Tokens.empty();
  2507. ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
  2508. assert((LevelDifference >= 0 ||
  2509. static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
  2510. "LevelDifference makes Line->Level negative");
  2511. Line->Level += LevelDifference;
  2512. // Comments stored before the preprocessor directive need to be output
  2513. // before the preprocessor directive, at the same level as the
  2514. // preprocessor directive, as we consider them to apply to the directive.
  2515. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
  2516. PPBranchLevel > 0)
  2517. Line->Level += PPBranchLevel;
  2518. flushComments(isOnNewLine(*FormatTok));
  2519. parsePPDirective();
  2520. }
  2521. while (FormatTok->Type == TT_ConflictStart ||
  2522. FormatTok->Type == TT_ConflictEnd ||
  2523. FormatTok->Type == TT_ConflictAlternative) {
  2524. if (FormatTok->Type == TT_ConflictStart) {
  2525. conditionalCompilationStart(/*Unreachable=*/false);
  2526. } else if (FormatTok->Type == TT_ConflictAlternative) {
  2527. conditionalCompilationAlternative();
  2528. } else if (FormatTok->Type == TT_ConflictEnd) {
  2529. conditionalCompilationEnd();
  2530. }
  2531. FormatTok = Tokens->getNextToken();
  2532. FormatTok->MustBreakBefore = true;
  2533. }
  2534. if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
  2535. !Line->InPPDirective) {
  2536. continue;
  2537. }
  2538. if (!FormatTok->Tok.is(tok::comment)) {
  2539. distributeComments(Comments, FormatTok);
  2540. Comments.clear();
  2541. return;
  2542. }
  2543. Comments.push_back(FormatTok);
  2544. } while (!eof());
  2545. distributeComments(Comments, nullptr);
  2546. Comments.clear();
  2547. }
  2548. void UnwrappedLineParser::pushToken(FormatToken *Tok) {
  2549. Line->Tokens.push_back(UnwrappedLineNode(Tok));
  2550. if (MustBreakBeforeNextToken) {
  2551. Line->Tokens.back().Tok->MustBreakBefore = true;
  2552. MustBreakBeforeNextToken = false;
  2553. }
  2554. }
  2555. } // end namespace format
  2556. } // end namespace clang