FormatToken.h 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains the declaration of the FormatToken, a wrapper
  11. /// around Token with additional information related to formatting.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  15. #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  16. #include "clang/Basic/IdentifierTable.h"
  17. #include "clang/Basic/OperatorPrecedence.h"
  18. #include "clang/Format/Format.h"
  19. #include "clang/Lex/Lexer.h"
  20. #include <memory>
  21. #include <unordered_set>
  22. namespace clang {
  23. namespace format {
  24. #define LIST_TOKEN_TYPES \
  25. TYPE(ArrayInitializerLSquare) \
  26. TYPE(ArraySubscriptLSquare) \
  27. TYPE(AttributeColon) \
  28. TYPE(AttributeParen) \
  29. TYPE(AttributeSquare) \
  30. TYPE(BinaryOperator) \
  31. TYPE(BitFieldColon) \
  32. TYPE(BlockComment) \
  33. TYPE(CastRParen) \
  34. TYPE(ConditionalExpr) \
  35. TYPE(ConflictAlternative) \
  36. TYPE(ConflictEnd) \
  37. TYPE(ConflictStart) \
  38. TYPE(CtorInitializerColon) \
  39. TYPE(CtorInitializerComma) \
  40. TYPE(DesignatedInitializerLSquare) \
  41. TYPE(DesignatedInitializerPeriod) \
  42. TYPE(DictLiteral) \
  43. TYPE(ForEachMacro) \
  44. TYPE(FunctionAnnotationRParen) \
  45. TYPE(FunctionDeclarationName) \
  46. TYPE(FunctionLBrace) \
  47. TYPE(FunctionTypeLParen) \
  48. TYPE(ImplicitStringLiteral) \
  49. TYPE(InheritanceColon) \
  50. TYPE(InheritanceComma) \
  51. TYPE(InlineASMBrace) \
  52. TYPE(InlineASMColon) \
  53. TYPE(JavaAnnotation) \
  54. TYPE(JsComputedPropertyName) \
  55. TYPE(JsExponentiation) \
  56. TYPE(JsExponentiationEqual) \
  57. TYPE(JsFatArrow) \
  58. TYPE(JsNonNullAssertion) \
  59. TYPE(JsPrivateIdentifier) \
  60. TYPE(JsTypeColon) \
  61. TYPE(JsTypeOperator) \
  62. TYPE(JsTypeOptionalQuestion) \
  63. TYPE(LambdaArrow) \
  64. TYPE(LambdaLBrace) \
  65. TYPE(LambdaLSquare) \
  66. TYPE(LeadingJavaAnnotation) \
  67. TYPE(LineComment) \
  68. TYPE(MacroBlockBegin) \
  69. TYPE(MacroBlockEnd) \
  70. TYPE(NamespaceMacro) \
  71. TYPE(ObjCBlockLBrace) \
  72. TYPE(ObjCBlockLParen) \
  73. TYPE(ObjCDecl) \
  74. TYPE(ObjCForIn) \
  75. TYPE(ObjCMethodExpr) \
  76. TYPE(ObjCMethodSpecifier) \
  77. TYPE(ObjCProperty) \
  78. TYPE(ObjCStringLiteral) \
  79. TYPE(OverloadedOperator) \
  80. TYPE(OverloadedOperatorLParen) \
  81. TYPE(PointerOrReference) \
  82. TYPE(PureVirtualSpecifier) \
  83. TYPE(RangeBasedForLoopColon) \
  84. TYPE(RegexLiteral) \
  85. TYPE(SelectorName) \
  86. TYPE(StartOfName) \
  87. TYPE(StatementMacro) \
  88. TYPE(StructuredBindingLSquare) \
  89. TYPE(TemplateCloser) \
  90. TYPE(TemplateOpener) \
  91. TYPE(TemplateString) \
  92. TYPE(ProtoExtensionLSquare) \
  93. TYPE(TrailingAnnotation) \
  94. TYPE(TrailingReturnArrow) \
  95. TYPE(TrailingUnaryOperator) \
  96. TYPE(TypenameMacro) \
  97. TYPE(UnaryOperator) \
  98. TYPE(CSharpStringLiteral) \
  99. TYPE(CSharpNullCoalescing) \
  100. TYPE(Unknown)
  101. enum TokenType {
  102. #define TYPE(X) TT_##X,
  103. LIST_TOKEN_TYPES
  104. #undef TYPE
  105. NUM_TOKEN_TYPES
  106. };
  107. /// Determines the name of a token type.
  108. const char *getTokenTypeName(TokenType Type);
  109. // Represents what type of block a set of braces open.
  110. enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
  111. // The packing kind of a function's parameters.
  112. enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
  113. enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
  114. class TokenRole;
  115. class AnnotatedLine;
  116. /// A wrapper around a \c Token storing information about the
  117. /// whitespace characters preceding it.
  118. struct FormatToken {
  119. FormatToken() {}
  120. /// The \c Token.
  121. Token Tok;
  122. /// The number of newlines immediately before the \c Token.
  123. ///
  124. /// This can be used to determine what the user wrote in the original code
  125. /// and thereby e.g. leave an empty line between two function definitions.
  126. unsigned NewlinesBefore = 0;
  127. /// Whether there is at least one unescaped newline before the \c
  128. /// Token.
  129. bool HasUnescapedNewline = false;
  130. /// The range of the whitespace immediately preceding the \c Token.
  131. SourceRange WhitespaceRange;
  132. /// The offset just past the last '\n' in this token's leading
  133. /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
  134. unsigned LastNewlineOffset = 0;
  135. /// The width of the non-whitespace parts of the token (or its first
  136. /// line for multi-line tokens) in columns.
  137. /// We need this to correctly measure number of columns a token spans.
  138. unsigned ColumnWidth = 0;
  139. /// Contains the width in columns of the last line of a multi-line
  140. /// token.
  141. unsigned LastLineColumnWidth = 0;
  142. /// Whether the token text contains newlines (escaped or not).
  143. bool IsMultiline = false;
  144. /// Indicates that this is the first token of the file.
  145. bool IsFirst = false;
  146. /// Whether there must be a line break before this token.
  147. ///
  148. /// This happens for example when a preprocessor directive ended directly
  149. /// before the token.
  150. bool MustBreakBefore = false;
  151. /// The raw text of the token.
  152. ///
  153. /// Contains the raw token text without leading whitespace and without leading
  154. /// escaped newlines.
  155. StringRef TokenText;
  156. /// Set to \c true if this token is an unterminated literal.
  157. bool IsUnterminatedLiteral = 0;
  158. /// Contains the kind of block if this token is a brace.
  159. BraceBlockKind BlockKind = BK_Unknown;
  160. TokenType Type = TT_Unknown;
  161. /// The number of spaces that should be inserted before this token.
  162. unsigned SpacesRequiredBefore = 0;
  163. /// \c true if it is allowed to break before this token.
  164. bool CanBreakBefore = false;
  165. /// \c true if this is the ">" of "template<..>".
  166. bool ClosesTemplateDeclaration = false;
  167. /// Number of parameters, if this is "(", "[" or "<".
  168. unsigned ParameterCount = 0;
  169. /// Number of parameters that are nested blocks,
  170. /// if this is "(", "[" or "<".
  171. unsigned BlockParameterCount = 0;
  172. /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
  173. /// the surrounding bracket.
  174. tok::TokenKind ParentBracket = tok::unknown;
  175. /// A token can have a special role that can carry extra information
  176. /// about the token's formatting.
  177. std::unique_ptr<TokenRole> Role;
  178. /// If this is an opening parenthesis, how are the parameters packed?
  179. ParameterPackingKind PackingKind = PPK_Inconclusive;
  180. /// The total length of the unwrapped line up to and including this
  181. /// token.
  182. unsigned TotalLength = 0;
  183. /// The original 0-based column of this token, including expanded tabs.
  184. /// The configured TabWidth is used as tab width.
  185. unsigned OriginalColumn = 0;
  186. /// The length of following tokens until the next natural split point,
  187. /// or the next token that can be broken.
  188. unsigned UnbreakableTailLength = 0;
  189. // FIXME: Come up with a 'cleaner' concept.
  190. /// The binding strength of a token. This is a combined value of
  191. /// operator precedence, parenthesis nesting, etc.
  192. unsigned BindingStrength = 0;
  193. /// The nesting level of this token, i.e. the number of surrounding (),
  194. /// [], {} or <>.
  195. unsigned NestingLevel = 0;
  196. /// The indent level of this token. Copied from the surrounding line.
  197. unsigned IndentLevel = 0;
  198. /// Penalty for inserting a line break before this token.
  199. unsigned SplitPenalty = 0;
  200. /// If this is the first ObjC selector name in an ObjC method
  201. /// definition or call, this contains the length of the longest name.
  202. ///
  203. /// This being set to 0 means that the selectors should not be colon-aligned,
  204. /// e.g. because several of them are block-type.
  205. unsigned LongestObjCSelectorName = 0;
  206. /// If this is the first ObjC selector name in an ObjC method
  207. /// definition or call, this contains the number of parts that the whole
  208. /// selector consist of.
  209. unsigned ObjCSelectorNameParts = 0;
  210. /// The 0-based index of the parameter/argument. For ObjC it is set
  211. /// for the selector name token.
  212. /// For now calculated only for ObjC.
  213. unsigned ParameterIndex = 0;
  214. /// Stores the number of required fake parentheses and the
  215. /// corresponding operator precedence.
  216. ///
  217. /// If multiple fake parentheses start at a token, this vector stores them in
  218. /// reverse order, i.e. inner fake parenthesis first.
  219. SmallVector<prec::Level, 4> FakeLParens;
  220. /// Insert this many fake ) after this token for correct indentation.
  221. unsigned FakeRParens = 0;
  222. /// \c true if this token starts a binary expression, i.e. has at least
  223. /// one fake l_paren with a precedence greater than prec::Unknown.
  224. bool StartsBinaryExpression = false;
  225. /// \c true if this token ends a binary expression.
  226. bool EndsBinaryExpression = false;
  227. /// If this is an operator (or "."/"->") in a sequence of operators
  228. /// with the same precedence, contains the 0-based operator index.
  229. unsigned OperatorIndex = 0;
  230. /// If this is an operator (or "."/"->") in a sequence of operators
  231. /// with the same precedence, points to the next operator.
  232. FormatToken *NextOperator = nullptr;
  233. /// Is this token part of a \c DeclStmt defining multiple variables?
  234. ///
  235. /// Only set if \c Type == \c TT_StartOfName.
  236. bool PartOfMultiVariableDeclStmt = false;
  237. /// Does this line comment continue a line comment section?
  238. ///
  239. /// Only set to true if \c Type == \c TT_LineComment.
  240. bool ContinuesLineCommentSection = false;
  241. /// If this is a bracket, this points to the matching one.
  242. FormatToken *MatchingParen = nullptr;
  243. /// The previous token in the unwrapped line.
  244. FormatToken *Previous = nullptr;
  245. /// The next token in the unwrapped line.
  246. FormatToken *Next = nullptr;
  247. /// If this token starts a block, this contains all the unwrapped lines
  248. /// in it.
  249. SmallVector<AnnotatedLine *, 1> Children;
  250. /// Stores the formatting decision for the token once it was made.
  251. FormatDecision Decision = FD_Unformatted;
  252. /// If \c true, this token has been fully formatted (indented and
  253. /// potentially re-formatted inside), and we do not allow further formatting
  254. /// changes.
  255. bool Finalized = false;
  256. bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
  257. bool is(TokenType TT) const { return Type == TT; }
  258. bool is(const IdentifierInfo *II) const {
  259. return II && II == Tok.getIdentifierInfo();
  260. }
  261. bool is(tok::PPKeywordKind Kind) const {
  262. return Tok.getIdentifierInfo() &&
  263. Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
  264. }
  265. template <typename A, typename B> bool isOneOf(A K1, B K2) const {
  266. return is(K1) || is(K2);
  267. }
  268. template <typename A, typename B, typename... Ts>
  269. bool isOneOf(A K1, B K2, Ts... Ks) const {
  270. return is(K1) || isOneOf(K2, Ks...);
  271. }
  272. template <typename T> bool isNot(T Kind) const { return !is(Kind); }
  273. bool isIf(bool AllowConstexprMacro = true) const {
  274. return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
  275. (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
  276. }
  277. bool closesScopeAfterBlock() const {
  278. if (BlockKind == BK_Block)
  279. return true;
  280. if (closesScope())
  281. return Previous->closesScopeAfterBlock();
  282. return false;
  283. }
  284. /// \c true if this token starts a sequence with the given tokens in order,
  285. /// following the ``Next`` pointers, ignoring comments.
  286. template <typename A, typename... Ts>
  287. bool startsSequence(A K1, Ts... Tokens) const {
  288. return startsSequenceInternal(K1, Tokens...);
  289. }
  290. /// \c true if this token ends a sequence with the given tokens in order,
  291. /// following the ``Previous`` pointers, ignoring comments.
  292. /// For example, given tokens [T1, T2, T3], the function returns true if
  293. /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other
  294. /// words, the tokens passed to this function need to the reverse of the
  295. /// order the tokens appear in code.
  296. template <typename A, typename... Ts>
  297. bool endsSequence(A K1, Ts... Tokens) const {
  298. return endsSequenceInternal(K1, Tokens...);
  299. }
  300. bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
  301. bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
  302. return Tok.isObjCAtKeyword(Kind);
  303. }
  304. bool isAccessSpecifier(bool ColonRequired = true) const {
  305. return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
  306. (!ColonRequired || (Next && Next->is(tok::colon)));
  307. }
  308. /// Determine whether the token is a simple-type-specifier.
  309. bool isSimpleTypeSpecifier() const;
  310. bool isObjCAccessSpecifier() const {
  311. return is(tok::at) && Next &&
  312. (Next->isObjCAtKeyword(tok::objc_public) ||
  313. Next->isObjCAtKeyword(tok::objc_protected) ||
  314. Next->isObjCAtKeyword(tok::objc_package) ||
  315. Next->isObjCAtKeyword(tok::objc_private));
  316. }
  317. /// Returns whether \p Tok is ([{ or an opening < of a template or in
  318. /// protos.
  319. bool opensScope() const {
  320. if (is(TT_TemplateString) && TokenText.endswith("${"))
  321. return true;
  322. if (is(TT_DictLiteral) && is(tok::less))
  323. return true;
  324. return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
  325. TT_TemplateOpener);
  326. }
  327. /// Returns whether \p Tok is )]} or a closing > of a template or in
  328. /// protos.
  329. bool closesScope() const {
  330. if (is(TT_TemplateString) && TokenText.startswith("}"))
  331. return true;
  332. if (is(TT_DictLiteral) && is(tok::greater))
  333. return true;
  334. return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
  335. TT_TemplateCloser);
  336. }
  337. /// Returns \c true if this is a "." or "->" accessing a member.
  338. bool isMemberAccess() const {
  339. return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
  340. !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
  341. TT_LambdaArrow);
  342. }
  343. bool isUnaryOperator() const {
  344. switch (Tok.getKind()) {
  345. case tok::plus:
  346. case tok::plusplus:
  347. case tok::minus:
  348. case tok::minusminus:
  349. case tok::exclaim:
  350. case tok::tilde:
  351. case tok::kw_sizeof:
  352. case tok::kw_alignof:
  353. return true;
  354. default:
  355. return false;
  356. }
  357. }
  358. bool isBinaryOperator() const {
  359. // Comma is a binary operator, but does not behave as such wrt. formatting.
  360. return getPrecedence() > prec::Comma;
  361. }
  362. bool isTrailingComment() const {
  363. return is(tok::comment) &&
  364. (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
  365. }
  366. /// Returns \c true if this is a keyword that can be used
  367. /// like a function call (e.g. sizeof, typeid, ...).
  368. bool isFunctionLikeKeyword() const {
  369. switch (Tok.getKind()) {
  370. case tok::kw_throw:
  371. case tok::kw_typeid:
  372. case tok::kw_return:
  373. case tok::kw_sizeof:
  374. case tok::kw_alignof:
  375. case tok::kw_alignas:
  376. case tok::kw_decltype:
  377. case tok::kw_noexcept:
  378. case tok::kw_static_assert:
  379. case tok::kw___attribute:
  380. return true;
  381. default:
  382. return false;
  383. }
  384. }
  385. /// Returns \c true if this is a string literal that's like a label,
  386. /// e.g. ends with "=" or ":".
  387. bool isLabelString() const {
  388. if (!is(tok::string_literal))
  389. return false;
  390. StringRef Content = TokenText;
  391. if (Content.startswith("\"") || Content.startswith("'"))
  392. Content = Content.drop_front(1);
  393. if (Content.endswith("\"") || Content.endswith("'"))
  394. Content = Content.drop_back(1);
  395. Content = Content.trim();
  396. return Content.size() > 1 &&
  397. (Content.back() == ':' || Content.back() == '=');
  398. }
  399. /// Returns actual token start location without leading escaped
  400. /// newlines and whitespace.
  401. ///
  402. /// This can be different to Tok.getLocation(), which includes leading escaped
  403. /// newlines.
  404. SourceLocation getStartOfNonWhitespace() const {
  405. return WhitespaceRange.getEnd();
  406. }
  407. prec::Level getPrecedence() const {
  408. return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
  409. /*CPlusPlus11=*/true);
  410. }
  411. /// Returns the previous token ignoring comments.
  412. FormatToken *getPreviousNonComment() const {
  413. FormatToken *Tok = Previous;
  414. while (Tok && Tok->is(tok::comment))
  415. Tok = Tok->Previous;
  416. return Tok;
  417. }
  418. /// Returns the next token ignoring comments.
  419. const FormatToken *getNextNonComment() const {
  420. const FormatToken *Tok = Next;
  421. while (Tok && Tok->is(tok::comment))
  422. Tok = Tok->Next;
  423. return Tok;
  424. }
  425. /// Returns \c true if this tokens starts a block-type list, i.e. a
  426. /// list that should be indented with a block indent.
  427. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
  428. if (is(TT_TemplateString) && opensScope())
  429. return true;
  430. return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
  431. (is(tok::l_brace) &&
  432. (BlockKind == BK_Block || is(TT_DictLiteral) ||
  433. (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
  434. (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
  435. Style.Language == FormatStyle::LK_TextProto));
  436. }
  437. /// Returns whether the token is the left square bracket of a C++
  438. /// structured binding declaration.
  439. bool isCppStructuredBinding(const FormatStyle &Style) const {
  440. if (!Style.isCpp() || isNot(tok::l_square))
  441. return false;
  442. const FormatToken *T = this;
  443. do {
  444. T = T->getPreviousNonComment();
  445. } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
  446. tok::ampamp));
  447. return T && T->is(tok::kw_auto);
  448. }
  449. /// Same as opensBlockOrBlockTypeList, but for the closing token.
  450. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
  451. if (is(TT_TemplateString) && closesScope())
  452. return true;
  453. return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
  454. }
  455. /// Return the actual namespace token, if this token starts a namespace
  456. /// block.
  457. const FormatToken *getNamespaceToken() const {
  458. const FormatToken *NamespaceTok = this;
  459. if (is(tok::comment))
  460. NamespaceTok = NamespaceTok->getNextNonComment();
  461. // Detect "(inline|export)? namespace" in the beginning of a line.
  462. if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
  463. NamespaceTok = NamespaceTok->getNextNonComment();
  464. return NamespaceTok &&
  465. NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro)
  466. ? NamespaceTok
  467. : nullptr;
  468. }
  469. private:
  470. // Disallow copying.
  471. FormatToken(const FormatToken &) = delete;
  472. void operator=(const FormatToken &) = delete;
  473. template <typename A, typename... Ts>
  474. bool startsSequenceInternal(A K1, Ts... Tokens) const {
  475. if (is(tok::comment) && Next)
  476. return Next->startsSequenceInternal(K1, Tokens...);
  477. return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
  478. }
  479. template <typename A> bool startsSequenceInternal(A K1) const {
  480. if (is(tok::comment) && Next)
  481. return Next->startsSequenceInternal(K1);
  482. return is(K1);
  483. }
  484. template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
  485. if (is(tok::comment) && Previous)
  486. return Previous->endsSequenceInternal(K1);
  487. return is(K1);
  488. }
  489. template <typename A, typename... Ts>
  490. bool endsSequenceInternal(A K1, Ts... Tokens) const {
  491. if (is(tok::comment) && Previous)
  492. return Previous->endsSequenceInternal(K1, Tokens...);
  493. return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
  494. }
  495. };
  496. class ContinuationIndenter;
  497. struct LineState;
  498. class TokenRole {
  499. public:
  500. TokenRole(const FormatStyle &Style) : Style(Style) {}
  501. virtual ~TokenRole();
  502. /// After the \c TokenAnnotator has finished annotating all the tokens,
  503. /// this function precomputes required information for formatting.
  504. virtual void precomputeFormattingInfos(const FormatToken *Token);
  505. /// Apply the special formatting that the given role demands.
  506. ///
  507. /// Assumes that the token having this role is already formatted.
  508. ///
  509. /// Continues formatting from \p State leaving indentation to \p Indenter and
  510. /// returns the total penalty that this formatting incurs.
  511. virtual unsigned formatFromToken(LineState &State,
  512. ContinuationIndenter *Indenter,
  513. bool DryRun) {
  514. return 0;
  515. }
  516. /// Same as \c formatFromToken, but assumes that the first token has
  517. /// already been set thereby deciding on the first line break.
  518. virtual unsigned formatAfterToken(LineState &State,
  519. ContinuationIndenter *Indenter,
  520. bool DryRun) {
  521. return 0;
  522. }
  523. /// Notifies the \c Role that a comma was found.
  524. virtual void CommaFound(const FormatToken *Token) {}
  525. virtual const FormatToken *lastComma() { return nullptr; }
  526. protected:
  527. const FormatStyle &Style;
  528. };
  529. class CommaSeparatedList : public TokenRole {
  530. public:
  531. CommaSeparatedList(const FormatStyle &Style)
  532. : TokenRole(Style), HasNestedBracedList(false) {}
  533. void precomputeFormattingInfos(const FormatToken *Token) override;
  534. unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
  535. bool DryRun) override;
  536. unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
  537. bool DryRun) override;
  538. /// Adds \p Token as the next comma to the \c CommaSeparated list.
  539. void CommaFound(const FormatToken *Token) override {
  540. Commas.push_back(Token);
  541. }
  542. const FormatToken *lastComma() override {
  543. if (Commas.empty())
  544. return nullptr;
  545. return Commas.back();
  546. }
  547. private:
  548. /// A struct that holds information on how to format a given list with
  549. /// a specific number of columns.
  550. struct ColumnFormat {
  551. /// The number of columns to use.
  552. unsigned Columns;
  553. /// The total width in characters.
  554. unsigned TotalWidth;
  555. /// The number of lines required for this format.
  556. unsigned LineCount;
  557. /// The size of each column in characters.
  558. SmallVector<unsigned, 8> ColumnSizes;
  559. };
  560. /// Calculate which \c ColumnFormat fits best into
  561. /// \p RemainingCharacters.
  562. const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
  563. /// The ordered \c FormatTokens making up the commas of this list.
  564. SmallVector<const FormatToken *, 8> Commas;
  565. /// The length of each of the list's items in characters including the
  566. /// trailing comma.
  567. SmallVector<unsigned, 8> ItemLengths;
  568. /// Precomputed formats that can be used for this list.
  569. SmallVector<ColumnFormat, 4> Formats;
  570. bool HasNestedBracedList;
  571. };
  572. /// Encapsulates keywords that are context sensitive or for languages not
  573. /// properly supported by Clang's lexer.
  574. struct AdditionalKeywords {
  575. AdditionalKeywords(IdentifierTable &IdentTable) {
  576. kw_final = &IdentTable.get("final");
  577. kw_override = &IdentTable.get("override");
  578. kw_in = &IdentTable.get("in");
  579. kw_of = &IdentTable.get("of");
  580. kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM");
  581. kw_CF_ENUM = &IdentTable.get("CF_ENUM");
  582. kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
  583. kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM");
  584. kw_NS_ENUM = &IdentTable.get("NS_ENUM");
  585. kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
  586. kw_as = &IdentTable.get("as");
  587. kw_async = &IdentTable.get("async");
  588. kw_await = &IdentTable.get("await");
  589. kw_declare = &IdentTable.get("declare");
  590. kw_finally = &IdentTable.get("finally");
  591. kw_from = &IdentTable.get("from");
  592. kw_function = &IdentTable.get("function");
  593. kw_get = &IdentTable.get("get");
  594. kw_import = &IdentTable.get("import");
  595. kw_infer = &IdentTable.get("infer");
  596. kw_is = &IdentTable.get("is");
  597. kw_let = &IdentTable.get("let");
  598. kw_module = &IdentTable.get("module");
  599. kw_readonly = &IdentTable.get("readonly");
  600. kw_set = &IdentTable.get("set");
  601. kw_type = &IdentTable.get("type");
  602. kw_typeof = &IdentTable.get("typeof");
  603. kw_var = &IdentTable.get("var");
  604. kw_yield = &IdentTable.get("yield");
  605. kw_abstract = &IdentTable.get("abstract");
  606. kw_assert = &IdentTable.get("assert");
  607. kw_extends = &IdentTable.get("extends");
  608. kw_implements = &IdentTable.get("implements");
  609. kw_instanceof = &IdentTable.get("instanceof");
  610. kw_interface = &IdentTable.get("interface");
  611. kw_native = &IdentTable.get("native");
  612. kw_package = &IdentTable.get("package");
  613. kw_synchronized = &IdentTable.get("synchronized");
  614. kw_throws = &IdentTable.get("throws");
  615. kw___except = &IdentTable.get("__except");
  616. kw___has_include = &IdentTable.get("__has_include");
  617. kw___has_include_next = &IdentTable.get("__has_include_next");
  618. kw_mark = &IdentTable.get("mark");
  619. kw_extend = &IdentTable.get("extend");
  620. kw_option = &IdentTable.get("option");
  621. kw_optional = &IdentTable.get("optional");
  622. kw_repeated = &IdentTable.get("repeated");
  623. kw_required = &IdentTable.get("required");
  624. kw_returns = &IdentTable.get("returns");
  625. kw_signals = &IdentTable.get("signals");
  626. kw_qsignals = &IdentTable.get("Q_SIGNALS");
  627. kw_slots = &IdentTable.get("slots");
  628. kw_qslots = &IdentTable.get("Q_SLOTS");
  629. // C# keywords
  630. kw_dollar = &IdentTable.get("dollar");
  631. kw_base = &IdentTable.get("base");
  632. kw_byte = &IdentTable.get("byte");
  633. kw_checked = &IdentTable.get("checked");
  634. kw_decimal = &IdentTable.get("decimal");
  635. kw_delegate = &IdentTable.get("delegate");
  636. kw_event = &IdentTable.get("event");
  637. kw_fixed = &IdentTable.get("fixed");
  638. kw_foreach = &IdentTable.get("foreach");
  639. kw_implicit = &IdentTable.get("implicit");
  640. kw_internal = &IdentTable.get("internal");
  641. kw_lock = &IdentTable.get("lock");
  642. kw_null = &IdentTable.get("null");
  643. kw_object = &IdentTable.get("object");
  644. kw_out = &IdentTable.get("out");
  645. kw_params = &IdentTable.get("params");
  646. kw_ref = &IdentTable.get("ref");
  647. kw_string = &IdentTable.get("string");
  648. kw_stackalloc = &IdentTable.get("stackalloc");
  649. kw_sbyte = &IdentTable.get("sbyte");
  650. kw_sealed = &IdentTable.get("sealed");
  651. kw_uint = &IdentTable.get("uint");
  652. kw_ulong = &IdentTable.get("ulong");
  653. kw_unchecked = &IdentTable.get("unchecked");
  654. kw_unsafe = &IdentTable.get("unsafe");
  655. kw_ushort = &IdentTable.get("ushort");
  656. // Keep this at the end of the constructor to make sure everything here
  657. // is
  658. // already initialized.
  659. JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
  660. {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
  661. kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
  662. kw_set, kw_type, kw_typeof, kw_var, kw_yield,
  663. // Keywords from the Java section.
  664. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
  665. CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
  666. {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
  667. kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
  668. kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
  669. kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
  670. kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort,
  671. // Keywords from the JavaScript section.
  672. kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
  673. kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
  674. kw_set, kw_type, kw_typeof, kw_var, kw_yield,
  675. // Keywords from the Java section.
  676. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
  677. }
  678. // Context sensitive keywords.
  679. IdentifierInfo *kw_final;
  680. IdentifierInfo *kw_override;
  681. IdentifierInfo *kw_in;
  682. IdentifierInfo *kw_of;
  683. IdentifierInfo *kw_CF_CLOSED_ENUM;
  684. IdentifierInfo *kw_CF_ENUM;
  685. IdentifierInfo *kw_CF_OPTIONS;
  686. IdentifierInfo *kw_NS_CLOSED_ENUM;
  687. IdentifierInfo *kw_NS_ENUM;
  688. IdentifierInfo *kw_NS_OPTIONS;
  689. IdentifierInfo *kw___except;
  690. IdentifierInfo *kw___has_include;
  691. IdentifierInfo *kw___has_include_next;
  692. // JavaScript keywords.
  693. IdentifierInfo *kw_as;
  694. IdentifierInfo *kw_async;
  695. IdentifierInfo *kw_await;
  696. IdentifierInfo *kw_declare;
  697. IdentifierInfo *kw_finally;
  698. IdentifierInfo *kw_from;
  699. IdentifierInfo *kw_function;
  700. IdentifierInfo *kw_get;
  701. IdentifierInfo *kw_import;
  702. IdentifierInfo *kw_infer;
  703. IdentifierInfo *kw_is;
  704. IdentifierInfo *kw_let;
  705. IdentifierInfo *kw_module;
  706. IdentifierInfo *kw_readonly;
  707. IdentifierInfo *kw_set;
  708. IdentifierInfo *kw_type;
  709. IdentifierInfo *kw_typeof;
  710. IdentifierInfo *kw_var;
  711. IdentifierInfo *kw_yield;
  712. // Java keywords.
  713. IdentifierInfo *kw_abstract;
  714. IdentifierInfo *kw_assert;
  715. IdentifierInfo *kw_extends;
  716. IdentifierInfo *kw_implements;
  717. IdentifierInfo *kw_instanceof;
  718. IdentifierInfo *kw_interface;
  719. IdentifierInfo *kw_native;
  720. IdentifierInfo *kw_package;
  721. IdentifierInfo *kw_synchronized;
  722. IdentifierInfo *kw_throws;
  723. // Pragma keywords.
  724. IdentifierInfo *kw_mark;
  725. // Proto keywords.
  726. IdentifierInfo *kw_extend;
  727. IdentifierInfo *kw_option;
  728. IdentifierInfo *kw_optional;
  729. IdentifierInfo *kw_repeated;
  730. IdentifierInfo *kw_required;
  731. IdentifierInfo *kw_returns;
  732. // QT keywords.
  733. IdentifierInfo *kw_signals;
  734. IdentifierInfo *kw_qsignals;
  735. IdentifierInfo *kw_slots;
  736. IdentifierInfo *kw_qslots;
  737. // C# keywords
  738. IdentifierInfo *kw_dollar;
  739. IdentifierInfo *kw_base;
  740. IdentifierInfo *kw_byte;
  741. IdentifierInfo *kw_checked;
  742. IdentifierInfo *kw_decimal;
  743. IdentifierInfo *kw_delegate;
  744. IdentifierInfo *kw_event;
  745. IdentifierInfo *kw_fixed;
  746. IdentifierInfo *kw_foreach;
  747. IdentifierInfo *kw_implicit;
  748. IdentifierInfo *kw_internal;
  749. IdentifierInfo *kw_lock;
  750. IdentifierInfo *kw_null;
  751. IdentifierInfo *kw_object;
  752. IdentifierInfo *kw_out;
  753. IdentifierInfo *kw_params;
  754. IdentifierInfo *kw_ref;
  755. IdentifierInfo *kw_string;
  756. IdentifierInfo *kw_stackalloc;
  757. IdentifierInfo *kw_sbyte;
  758. IdentifierInfo *kw_sealed;
  759. IdentifierInfo *kw_uint;
  760. IdentifierInfo *kw_ulong;
  761. IdentifierInfo *kw_unchecked;
  762. IdentifierInfo *kw_unsafe;
  763. IdentifierInfo *kw_ushort;
  764. /// Returns \c true if \p Tok is a true JavaScript identifier, returns
  765. /// \c false if it is a keyword or a pseudo keyword.
  766. bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
  767. return Tok.is(tok::identifier) &&
  768. JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
  769. JsExtraKeywords.end();
  770. }
  771. /// Returns \c true if \p Tok is a C# keyword, returns
  772. /// \c false if it is a anything else.
  773. bool isCSharpKeyword(const FormatToken &Tok) const {
  774. switch (Tok.Tok.getKind()) {
  775. case tok::kw_bool:
  776. case tok::kw_break:
  777. case tok::kw_case:
  778. case tok::kw_catch:
  779. case tok::kw_char:
  780. case tok::kw_class:
  781. case tok::kw_const:
  782. case tok::kw_continue:
  783. case tok::kw_default:
  784. case tok::kw_do:
  785. case tok::kw_double:
  786. case tok::kw_else:
  787. case tok::kw_enum:
  788. case tok::kw_explicit:
  789. case tok::kw_extern:
  790. case tok::kw_false:
  791. case tok::kw_float:
  792. case tok::kw_for:
  793. case tok::kw_goto:
  794. case tok::kw_if:
  795. case tok::kw_int:
  796. case tok::kw_long:
  797. case tok::kw_namespace:
  798. case tok::kw_new:
  799. case tok::kw_operator:
  800. case tok::kw_private:
  801. case tok::kw_protected:
  802. case tok::kw_public:
  803. case tok::kw_return:
  804. case tok::kw_short:
  805. case tok::kw_sizeof:
  806. case tok::kw_static:
  807. case tok::kw_struct:
  808. case tok::kw_switch:
  809. case tok::kw_this:
  810. case tok::kw_throw:
  811. case tok::kw_true:
  812. case tok::kw_try:
  813. case tok::kw_typeof:
  814. case tok::kw_using:
  815. case tok::kw_virtual:
  816. case tok::kw_void:
  817. case tok::kw_volatile:
  818. case tok::kw_while:
  819. return true;
  820. default:
  821. return Tok.is(tok::identifier) &&
  822. CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
  823. CSharpExtraKeywords.end();
  824. }
  825. }
  826. private:
  827. /// The JavaScript keywords beyond the C++ keyword set.
  828. std::unordered_set<IdentifierInfo *> JsExtraKeywords;
  829. /// The C# keywords beyond the C++ keyword set
  830. std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
  831. };
  832. } // namespace format
  833. } // namespace clang
  834. #endif