浏览代码

[clang-format] Adds a canonical delimiter to raw string formatting

Summary:
This patch adds canonical delimiter support to the raw string formatting.
This allows matching delimiters to be updated to the canonical one.

Reviewers: bkramer

Reviewed By: bkramer

Subscribers: klimek, cfe-commits

Differential Revision: https://reviews.llvm.org/D42187

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@322956 91177308-0d34-0410-b5e6-96231b3b80d8
Krasimir Georgiev 7 年之前
父节点
当前提交
7de2463761

+ 4 - 0
docs/ClangFormatStyleOptions.rst

@@ -1590,6 +1590,9 @@ the configuration (without a prefix: ``Auto``).
   precedence over a matching enclosing function name for determining the
   precedence over a matching enclosing function name for determining the
   language of the raw string contents.
   language of the raw string contents.
 
 
+  If a canonical delimiter is specified, occurences of other delimiters for
+  the same language will be updated to the canonical if possible.
+
   There should be at most one specification per language and each delimiter
   There should be at most one specification per language and each delimiter
   and enclosing function should not occur in multiple specifications.
   and enclosing function should not occur in multiple specifications.
 
 
@@ -1610,6 +1613,7 @@ the configuration (without a prefix: ``Auto``).
             - 'cc'
             - 'cc'
             - 'cpp'
             - 'cpp'
           BasedOnStyle: llvm
           BasedOnStyle: llvm
+          CanonicalDelimiter: 'cc'
 
 
 **ReflowComments** (``bool``)
 **ReflowComments** (``bool``)
   If ``true``, clang-format will attempt to re-flow comments.
   If ``true``, clang-format will attempt to re-flow comments.

+ 7 - 0
include/clang/Format/Format.h

@@ -1369,6 +1369,8 @@ struct FormatStyle {
     std::vector<std::string> Delimiters;
     std::vector<std::string> Delimiters;
     /// \brief A list of enclosing function names that match this language.
     /// \brief A list of enclosing function names that match this language.
     std::vector<std::string> EnclosingFunctions;
     std::vector<std::string> EnclosingFunctions;
+    /// \brief The canonical delimiter for this language.
+    std::string CanonicalDelimiter;
     /// \brief The style name on which this raw string format is based on.
     /// \brief The style name on which this raw string format is based on.
     /// If not specified, the raw string format is based on the style that this
     /// If not specified, the raw string format is based on the style that this
     /// format is based on.
     /// format is based on.
@@ -1376,6 +1378,7 @@ struct FormatStyle {
     bool operator==(const RawStringFormat &Other) const {
     bool operator==(const RawStringFormat &Other) const {
       return Language == Other.Language && Delimiters == Other.Delimiters &&
       return Language == Other.Language && Delimiters == Other.Delimiters &&
              EnclosingFunctions == Other.EnclosingFunctions &&
              EnclosingFunctions == Other.EnclosingFunctions &&
+             CanonicalDelimiter == Other.CanonicalDelimiter &&
              BasedOnStyle == Other.BasedOnStyle;
              BasedOnStyle == Other.BasedOnStyle;
     }
     }
   };
   };
@@ -1392,6 +1395,9 @@ struct FormatStyle {
   /// precedence over a matching enclosing function name for determining the
   /// precedence over a matching enclosing function name for determining the
   /// language of the raw string contents.
   /// language of the raw string contents.
   ///
   ///
+  /// If a canonical delimiter is specified, occurences of other delimiters for
+  /// the same language will be updated to the canonical if possible.
+  ///
   /// There should be at most one specification per language and each delimiter
   /// There should be at most one specification per language and each delimiter
   /// and enclosing function should not occur in multiple specifications.
   /// and enclosing function should not occur in multiple specifications.
   ///
   ///
@@ -1410,6 +1416,7 @@ struct FormatStyle {
   ///           - 'cc'
   ///           - 'cc'
   ///           - 'cpp'
   ///           - 'cpp'
   ///         BasedOnStyle: llvm
   ///         BasedOnStyle: llvm
+  ///         CanonicalDelimiter: 'cc'
   /// \endcode
   /// \endcode
   std::vector<RawStringFormat> RawStringFormats;
   std::vector<RawStringFormat> RawStringFormats;
 
 

+ 64 - 12
lib/Format/ContinuationIndenter.cpp

@@ -102,6 +102,18 @@ static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
   return Delimiter;
   return Delimiter;
 }
 }
 
 
+// Returns the canonical delimiter for \p Language, or the empty string if no
+// canonical delimiter is specified.
+static StringRef
+getCanonicalRawStringDelimiter(const FormatStyle &Style,
+                               FormatStyle::LanguageKind Language) {
+  for (const auto &Format : Style.RawStringFormats) {
+    if (Format.Language == Language)
+      return StringRef(Format.CanonicalDelimiter);
+  }
+  return "";
+}
+
 RawStringFormatStyleManager::RawStringFormatStyleManager(
 RawStringFormatStyleManager::RawStringFormatStyleManager(
     const FormatStyle &CodeStyle) {
     const FormatStyle &CodeStyle) {
   for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
   for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
@@ -1312,14 +1324,32 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
     const FormatToken &Current, LineState &State,
     const FormatToken &Current, LineState &State,
     const FormatStyle &RawStringStyle, bool DryRun) {
     const FormatStyle &RawStringStyle, bool DryRun) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  auto Delimiter = *getRawStringDelimiter(Current.TokenText);
+  StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
+  StringRef NewDelimiter =
+      getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
+  if (NewDelimiter.empty() || OldDelimiter.empty())
+    NewDelimiter = OldDelimiter;
   // The text of a raw string is between the leading 'R"delimiter(' and the
   // The text of a raw string is between the leading 'R"delimiter(' and the
   // trailing 'delimiter)"'.
   // trailing 'delimiter)"'.
-  unsigned PrefixSize = 3 + Delimiter.size();
-  unsigned SuffixSize = 2 + Delimiter.size();
+  unsigned OldPrefixSize = 3 + OldDelimiter.size();
+  unsigned OldSuffixSize = 2 + OldDelimiter.size();
+  // We create a virtual text environment which expects a null-terminated
+  // string, so we cannot use StringRef.
+  std::string RawText =
+      Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize);
+  if (NewDelimiter != OldDelimiter) {
+    // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
+    // raw string.
+    std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
+    if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
+      NewDelimiter = OldDelimiter;
+  }
+
+  unsigned NewPrefixSize = 3 + NewDelimiter.size();
+  unsigned NewSuffixSize = 2 + NewDelimiter.size();
 
 
-  // The first start column is the column the raw text starts.
-  unsigned FirstStartColumn = StartColumn + PrefixSize;
+  // The first start column is the column the raw text starts after formatting.
+  unsigned FirstStartColumn = StartColumn + NewPrefixSize;
 
 
   // The next start column is the intended indentation a line break inside
   // The next start column is the intended indentation a line break inside
   // the raw string at level 0. It is determined by the following rules:
   // the raw string at level 0. It is determined by the following rules:
@@ -1330,7 +1360,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
   // These rules have the advantage that the formatted content both does not
   // These rules have the advantage that the formatted content both does not
   // violate the rectangle rule and visually flows within the surrounding
   // violate the rectangle rule and visually flows within the surrounding
   // source.
   // source.
-  bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';
+  bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
   unsigned NextStartColumn = ContentStartsOnNewline
   unsigned NextStartColumn = ContentStartsOnNewline
                                  ? State.Stack.back().Indent + Style.IndentWidth
                                  ? State.Stack.back().Indent + Style.IndentWidth
                                  : FirstStartColumn;
                                  : FirstStartColumn;
@@ -1344,12 +1374,9 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
   //   - if the raw string prefix does not start on a newline, it is the current
   //   - if the raw string prefix does not start on a newline, it is the current
   //     indent.
   //     indent.
   unsigned LastStartColumn = Current.NewlinesBefore
   unsigned LastStartColumn = Current.NewlinesBefore
-                                 ? FirstStartColumn - PrefixSize
+                                 ? FirstStartColumn - NewPrefixSize
                                  : State.Stack.back().Indent;
                                  : State.Stack.back().Indent;
 
 
-  std::string RawText =
-      Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);
-
   std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
   std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
       RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
       RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
       FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
       FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
@@ -1362,8 +1389,33 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
     return 0;
     return 0;
   }
   }
   if (!DryRun) {
   if (!DryRun) {
+    if (NewDelimiter != OldDelimiter) {
+      // In 'R"delimiter(...', the delimiter starts 2 characters after the start
+      // of the token.
+      SourceLocation PrefixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(2);
+      auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (PrefixErr) {
+        llvm::errs()
+            << "Failed to update the prefix delimiter of a raw string: "
+            << llvm::toString(std::move(PrefixErr)) << "\n";
+      }
+      // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
+      // position length - 1 - |delimiter|.
+      SourceLocation SuffixDelimiterStart =
+          Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
+                                                     1 - OldDelimiter.size());
+      auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
+          SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
+      if (SuffixErr) {
+        llvm::errs()
+            << "Failed to update the suffix delimiter of a raw string: "
+            << llvm::toString(std::move(SuffixErr)) << "\n";
+      }
+    }
     SourceLocation OriginLoc =
     SourceLocation OriginLoc =
-        Current.Tok.getLocation().getLocWithOffset(PrefixSize);
+        Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
     for (const tooling::Replacement &Fix : Fixes.first) {
     for (const tooling::Replacement &Fix : Fixes.first) {
       auto Err = Whitespaces.addReplacement(tooling::Replacement(
       auto Err = Whitespaces.addReplacement(tooling::Replacement(
           SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
           SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
@@ -1376,7 +1428,7 @@ unsigned ContinuationIndenter::reformatRawStringLiteral(
   }
   }
   unsigned RawLastLineEndColumn = getLastLineEndColumn(
   unsigned RawLastLineEndColumn = getLastLineEndColumn(
       *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
       *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
-  State.Column = RawLastLineEndColumn + SuffixSize;
+  State.Column = RawLastLineEndColumn + NewSuffixSize;
   return Fixes.second;
   return Fixes.second;
 }
 }
 
 

+ 2 - 0
lib/Format/Format.cpp

@@ -459,6 +459,7 @@ template <> struct MappingTraits<FormatStyle::RawStringFormat> {
     IO.mapOptional("Language", Format.Language);
     IO.mapOptional("Language", Format.Language);
     IO.mapOptional("Delimiters", Format.Delimiters);
     IO.mapOptional("Delimiters", Format.Delimiters);
     IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
     IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions);
+    IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter);
     IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
     IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);
   }
   }
 };
 };
@@ -713,6 +714,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
            "PARSE_TEXT_PROTO",
            "PARSE_TEXT_PROTO",
            "ParseTextProto",
            "ParseTextProto",
        },
        },
+      /*CanonicalDelimiter=*/"",
       /*BasedOnStyle=*/"google",
       /*BasedOnStyle=*/"google",
   }};
   }};
   GoogleStyle.SpacesBeforeTrailingComments = 2;
   GoogleStyle.SpacesBeforeTrailingComments = 2;

+ 5 - 2
unittests/Format/FormatTest.cpp

@@ -10429,13 +10429,15 @@ TEST_F(FormatTest, ParsesConfiguration) {
           FormatStyle::LK_TextProto,
           FormatStyle::LK_TextProto,
           {"pb", "proto"},
           {"pb", "proto"},
           {"PARSE_TEXT_PROTO"},
           {"PARSE_TEXT_PROTO"},
+          /*CanonicalDelimiter=*/"",
           "llvm",
           "llvm",
       },
       },
       {
       {
           FormatStyle::LK_Cpp,
           FormatStyle::LK_Cpp,
           {"cc", "cpp"},
           {"cc", "cpp"},
           {"C_CODEBLOCK", "CPPEVAL"},
           {"C_CODEBLOCK", "CPPEVAL"},
-          "",
+          /*CanonicalDelimiter=*/"cc",
+          /*BasedOnStyle=*/"",
       },
       },
   };
   };
 
 
@@ -10453,7 +10455,8 @@ TEST_F(FormatTest, ParsesConfiguration) {
               "      - 'cpp'\n"
               "      - 'cpp'\n"
               "    EnclosingFunctions:\n"
               "    EnclosingFunctions:\n"
               "      - 'C_CODEBLOCK'\n"
               "      - 'C_CODEBLOCK'\n"
-              "      - 'CPPEVAL'\n",
+              "      - 'CPPEVAL'\n"
+              "    CanonicalDelimiter: 'cc'",
               RawStringFormats, ExpectedRawStringFormats);
               RawStringFormats, ExpectedRawStringFormats);
 }
 }
 
 

+ 40 - 11
unittests/Format/FormatTestRawStrings.cpp

@@ -66,10 +66,13 @@ protected:
     FormatStyle Style = getLLVMStyle();
     FormatStyle Style = getLLVMStyle();
     Style.ColumnLimit = ColumnLimit;
     Style.ColumnLimit = ColumnLimit;
     Style.RawStringFormats = {
     Style.RawStringFormats = {
-        {/*Language=*/FormatStyle::LK_TextProto,
-         /*Delimiters=*/{"pb"},
-         /*EnclosingFunctions=*/{},
-         /*BasedOnStyle=*/"google"},
+        {
+            /*Language=*/FormatStyle::LK_TextProto,
+            /*Delimiters=*/{"pb"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            /*BasedOnStyle=*/"google",
+        },
     };
     };
     return Style;
     return Style;
   }
   }
@@ -77,9 +80,13 @@ protected:
   FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
   FormatStyle getRawStringLLVMCppStyleBasedOn(std::string BasedOnStyle) {
     FormatStyle Style = getLLVMStyle();
     FormatStyle Style = getLLVMStyle();
     Style.RawStringFormats = {
     Style.RawStringFormats = {
-        {/*Language=*/FormatStyle::LK_Cpp,
-         /*Delimiters=*/{"cpp"},
-         /*EnclosingFunctions=*/{}, BasedOnStyle},
+        {
+            /*Language=*/FormatStyle::LK_Cpp,
+            /*Delimiters=*/{"cpp"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            BasedOnStyle,
+        },
     };
     };
     return Style;
     return Style;
   }
   }
@@ -87,9 +94,13 @@ protected:
   FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
   FormatStyle getRawStringGoogleCppStyleBasedOn(std::string BasedOnStyle) {
     FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
     FormatStyle Style = getGoogleStyle(FormatStyle::LK_Cpp);
     Style.RawStringFormats = {
     Style.RawStringFormats = {
-        {/*Language=*/FormatStyle::LK_Cpp,
-         /*Delimiters=*/{"cpp"},
-         /*EnclosingFunctions=*/{}, BasedOnStyle},
+        {
+            /*Language=*/FormatStyle::LK_Cpp,
+            /*Delimiters=*/{"cpp"},
+            /*EnclosingFunctions=*/{},
+            /*CanonicalDelimiter=*/"",
+            BasedOnStyle,
+        },
     };
     };
     return Style;
     return Style;
   }
   }
@@ -131,7 +142,13 @@ TEST_F(FormatTestRawStrings, UsesConfigurationOverBaseStyle) {
   EXPECT_EQ(0, parseConfiguration("---\n"
   EXPECT_EQ(0, parseConfiguration("---\n"
                                   "Language: Cpp\n"
                                   "Language: Cpp\n"
                                   "BasedOnStyle: Google", &Style).value());
                                   "BasedOnStyle: Google", &Style).value());
-  Style.RawStringFormats = {{FormatStyle::LK_Cpp, {"cpp"}, {}, "llvm"}};
+  Style.RawStringFormats = {{
+      FormatStyle::LK_Cpp,
+      {"cpp"},
+      {},
+      /*CanonicalDelimiter=*/"",
+      /*BasedOnStyle=*/"llvm",
+  }};
   expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test",
   expect_eq(R"test(int* i = R"cpp(int* j = 0;)cpp";)test",
             format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style));
             format(R"test(int * i = R"cpp(int * j = 0;)cpp";)test", Style));
 }
 }
@@ -752,6 +769,18 @@ a = ParseTextProto<ProtoType>(R"(key:value)");)test",
                    Style));
                    Style));
 }
 }
 
 
+TEST_F(FormatTestRawStrings, UpdatesToCanonicalDelimiters) {
+  FormatStyle Style = getRawStringPbStyleWithColumns(25);
+  Style.RawStringFormats[0].CanonicalDelimiter = "proto";
+  expect_eq(R"test(a = R"proto(key: value)proto";)test",
+            format(R"test(a = R"pb(key:value)pb";)test", Style));
+
+  // Don't update to canonical delimiter if it occurs as a raw string suffix in
+  // the raw string content.
+  expect_eq(R"test(a = R"pb(key: ")proto")pb";)test",
+            format(R"test(a = R"pb(key:")proto")pb";)test", Style));
+}
+
 } // end namespace
 } // end namespace
 } // end namespace format
 } // end namespace format
 } // end namespace clang
 } // end namespace clang