FormatTokenLexer.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file contains FormatTokenLexer, which tokenizes a source file
  11. /// into a token stream suitable for ClangFormat.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  15. #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
  16. #include "Encoding.h"
  17. #include "FormatToken.h"
  18. #include "clang/Basic/SourceLocation.h"
  19. #include "clang/Basic/SourceManager.h"
  20. #include "clang/Format/Format.h"
  21. #include "llvm/ADT/MapVector.h"
  22. #include "llvm/Support/Regex.h"
  23. #include <stack>
  24. namespace clang {
  25. namespace format {
  26. enum LexerState {
  27. NORMAL,
  28. TEMPLATE_STRING,
  29. TOKEN_STASHED,
  30. };
  31. class FormatTokenLexer {
  32. public:
  33. FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
  34. const FormatStyle &Style, encoding::Encoding Encoding);
  35. ArrayRef<FormatToken *> lex();
  36. const AdditionalKeywords &getKeywords() { return Keywords; }
  37. private:
  38. void tryMergePreviousTokens();
  39. bool tryMergeLessLess();
  40. bool tryMergeNSStringLiteral();
  41. bool tryMergeJSPrivateIdentifier();
  42. bool tryMergeCSharpVerbatimStringLiteral();
  43. bool tryMergeCSharpKeywordVariables();
  44. bool tryMergeCSharpNullConditionals();
  45. bool tryMergeCSharpDoubleQuestion();
  46. bool tryTransformCSharpForEach();
  47. bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
  48. // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
  49. bool precedesOperand(FormatToken *Tok);
  50. bool canPrecedeRegexLiteral(FormatToken *Prev);
  51. // Tries to parse a JavaScript Regex literal starting at the current token,
  52. // if that begins with a slash and is in a location where JavaScript allows
  53. // regex literals. Changes the current token to a regex literal and updates
  54. // its text if successful.
  55. void tryParseJSRegexLiteral();
  56. // Handles JavaScript template strings.
  57. //
  58. // JavaScript template strings use backticks ('`') as delimiters, and allow
  59. // embedding expressions nested in ${expr-here}. Template strings can be
  60. // nested recursively, i.e. expressions can contain template strings in turn.
  61. //
  62. // The code below parses starting from a backtick, up to a closing backtick or
  63. // an opening ${. It also maintains a stack of lexing contexts to handle
  64. // nested template parts by balancing curly braces.
  65. void handleTemplateStrings();
  66. void tryParsePythonComment();
  67. bool tryMerge_TMacro();
  68. bool tryMergeConflictMarkers();
  69. FormatToken *getStashedToken();
  70. FormatToken *getNextToken();
  71. FormatToken *FormatTok;
  72. bool IsFirstToken;
  73. std::stack<LexerState> StateStack;
  74. unsigned Column;
  75. unsigned TrailingWhitespace;
  76. std::unique_ptr<Lexer> Lex;
  77. const SourceManager &SourceMgr;
  78. FileID ID;
  79. const FormatStyle &Style;
  80. IdentifierTable IdentTable;
  81. AdditionalKeywords Keywords;
  82. encoding::Encoding Encoding;
  83. llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
  84. // Index (in 'Tokens') of the last token that starts a new line.
  85. unsigned FirstInLineIndex;
  86. SmallVector<FormatToken *, 16> Tokens;
  87. llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
  88. bool FormattingDisabled;
  89. llvm::Regex MacroBlockBeginRegex;
  90. llvm::Regex MacroBlockEndRegex;
  91. void readRawToken(FormatToken &Tok);
  92. void resetLexer(unsigned Offset);
  93. };
  94. } // namespace format
  95. } // namespace clang
  96. #endif