WhitespaceManager.h 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// WhitespaceManager class manages whitespace around tokens and their
  11. /// replacements.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
  15. #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
  16. #include "TokenAnnotator.h"
  17. #include "clang/Basic/SourceManager.h"
  18. #include "clang/Format/Format.h"
  19. #include <string>
  20. namespace clang {
  21. namespace format {
  22. /// Manages the whitespaces around tokens and their replacements.
  23. ///
  24. /// This includes special handling for certain constructs, e.g. the alignment of
  25. /// trailing line comments.
  26. ///
  27. /// To guarantee correctness of alignment operations, the \c WhitespaceManager
  28. /// must be informed about every token in the source file; for each token, there
  29. /// must be exactly one call to either \c replaceWhitespace or
  30. /// \c addUntouchableToken.
  31. ///
  32. /// There may be multiple calls to \c breakToken for a given token.
  33. class WhitespaceManager {
  34. public:
  35. WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
  36. bool UseCRLF)
  37. : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
  38. bool useCRLF() const { return UseCRLF; }
  39. /// Replaces the whitespace in front of \p Tok. Only call once for
  40. /// each \c AnnotatedToken.
  41. ///
  42. /// \p StartOfTokenColumn is the column at which the token will start after
  43. /// this replacement. It is needed for determining how \p Spaces is turned
  44. /// into tabs and spaces for some format styles.
  45. void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
  46. unsigned StartOfTokenColumn,
  47. bool InPPDirective = false);
  48. /// Adds information about an unchangeable token's whitespace.
  49. ///
  50. /// Needs to be called for every token for which \c replaceWhitespace
  51. /// was not called.
  52. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
  53. llvm::Error addReplacement(const tooling::Replacement &Replacement);
  54. /// Inserts or replaces whitespace in the middle of a token.
  55. ///
  56. /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
  57. /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
  58. /// characters.
  59. ///
  60. /// Note: \p Spaces can be negative to retain information about initial
  61. /// relative column offset between a line of a block comment and the start of
  62. /// the comment. This negative offset may be compensated by trailing comment
  63. /// alignment here. In all other cases negative \p Spaces will be truncated to
  64. /// 0.
  65. ///
  66. /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
  67. /// used to align backslashes correctly.
  68. void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
  69. unsigned ReplaceChars,
  70. StringRef PreviousPostfix,
  71. StringRef CurrentPrefix, bool InPPDirective,
  72. unsigned Newlines, int Spaces);
  73. /// Returns all the \c Replacements created during formatting.
  74. const tooling::Replacements &generateReplacements();
  75. /// Represents a change before a token, a break inside a token,
  76. /// or the layout of an unchanged token (or whitespace within).
  77. struct Change {
  78. /// Functor to sort changes in original source order.
  79. class IsBeforeInFile {
  80. public:
  81. IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
  82. bool operator()(const Change &C1, const Change &C2) const;
  83. private:
  84. const SourceManager &SourceMgr;
  85. };
  86. /// Creates a \c Change.
  87. ///
  88. /// The generated \c Change will replace the characters at
  89. /// \p OriginalWhitespaceRange with a concatenation of
  90. /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
  91. /// and \p CurrentLinePrefix.
  92. ///
  93. /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
  94. /// trailing comments and escaped newlines.
  95. Change(const FormatToken &Tok, bool CreateReplacement,
  96. SourceRange OriginalWhitespaceRange, int Spaces,
  97. unsigned StartOfTokenColumn, unsigned NewlinesBefore,
  98. StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
  99. bool ContinuesPPDirective, bool IsInsideToken);
  100. // The kind of the token whose whitespace this change replaces, or in which
  101. // this change inserts whitespace.
  102. // FIXME: Currently this is not set correctly for breaks inside comments, as
  103. // the \c BreakableToken is still doing its own alignment.
  104. const FormatToken *Tok;
  105. bool CreateReplacement;
  106. // Changes might be in the middle of a token, so we cannot just keep the
  107. // FormatToken around to query its information.
  108. SourceRange OriginalWhitespaceRange;
  109. unsigned StartOfTokenColumn;
  110. unsigned NewlinesBefore;
  111. std::string PreviousLinePostfix;
  112. std::string CurrentLinePrefix;
  113. bool ContinuesPPDirective;
  114. // The number of spaces in front of the token or broken part of the token.
  115. // This will be adapted when aligning tokens.
  116. // Can be negative to retain information about the initial relative offset
  117. // of the lines in a block comment. This is used when aligning trailing
  118. // comments. Uncompensated negative offset is truncated to 0.
  119. int Spaces;
  120. // If this change is inside of a token but not at the start of the token or
  121. // directly after a newline.
  122. bool IsInsideToken;
  123. // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
  124. // \c EscapedNewlineColumn will be calculated in
  125. // \c calculateLineBreakInformation.
  126. bool IsTrailingComment;
  127. unsigned TokenLength;
  128. unsigned PreviousEndOfTokenColumn;
  129. unsigned EscapedNewlineColumn;
  130. // These fields are used to retain correct relative line indentation in a
  131. // block comment when aligning trailing comments.
  132. //
  133. // If this Change represents a continuation of a block comment,
  134. // \c StartOfBlockComment is pointer to the first Change in the block
  135. // comment. \c IndentationOffset is a relative column offset to this
  136. // change, so that the correct column can be reconstructed at the end of
  137. // the alignment process.
  138. const Change *StartOfBlockComment;
  139. int IndentationOffset;
  140. // A combination of indent level and nesting level, which are used in
  141. // tandem to compute lexical scope, for the purposes of deciding
  142. // when to stop consecutive alignment runs.
  143. std::pair<unsigned, unsigned> indentAndNestingLevel() const {
  144. return std::make_pair(Tok->IndentLevel, Tok->NestingLevel);
  145. }
  146. };
  147. private:
  148. /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
  149. /// or token parts in a line and \c PreviousEndOfTokenColumn and
  150. /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
  151. void calculateLineBreakInformation();
  152. /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
  153. void alignConsecutiveMacros();
  154. /// Align consecutive assignments over all \c Changes.
  155. void alignConsecutiveAssignments();
  156. /// Align consecutive declarations over all \c Changes.
  157. void alignConsecutiveDeclarations();
  158. /// Align trailing comments over all \c Changes.
  159. void alignTrailingComments();
  160. /// Align trailing comments from change \p Start to change \p End at
  161. /// the specified \p Column.
  162. void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
  163. /// Align escaped newlines over all \c Changes.
  164. void alignEscapedNewlines();
  165. /// Align escaped newlines from change \p Start to change \p End at
  166. /// the specified \p Column.
  167. void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
  168. /// Fill \c Replaces with the replacements for all effective changes.
  169. void generateChanges();
  170. /// Stores \p Text as the replacement for the whitespace in \p Range.
  171. void storeReplacement(SourceRange Range, StringRef Text);
  172. void appendNewlineText(std::string &Text, unsigned Newlines);
  173. void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
  174. unsigned PreviousEndOfTokenColumn,
  175. unsigned EscapedNewlineColumn);
  176. void appendIndentText(std::string &Text, unsigned IndentLevel,
  177. unsigned Spaces, unsigned WhitespaceStartColumn);
  178. SmallVector<Change, 16> Changes;
  179. const SourceManager &SourceMgr;
  180. tooling::Replacements Replaces;
  181. const FormatStyle &Style;
  182. bool UseCRLF;
  183. };
  184. } // namespace format
  185. } // namespace clang
  186. #endif