14 yıl önce · 2fa4e86b4f
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -55,6 +55,15 @@ def err_unterminated___pragma : Error<"missing terminating ')' character">;
 
															 def err_conflict_marker : Error<"version control conflict marker in file">;
														
 
															+def err_raw_delim_too_long : Error<
														
 
															+  "raw string delimiter longer than 16 characters"
														
 
															+  "; use PREFIX( )PREFIX to delimit raw string">;
														
 
															+def err_invalid_char_raw_delim : Error<
														
 
															+  "invalid character '%0' character in raw string delimiter"
														
 
															+  "; use PREFIX( )PREFIX to delimit raw string">;
														
 
															+def err_unterminated_raw_string : Error<
														
 
															+  "raw string missing terminating delimiter )%0\"">;
														
 
															+
														
 
															 def ext_multichar_character_literal : ExtWarn<
														
 
															   "multi-character character constant">, InGroup<MultiChar>;
														
 
															 def ext_four_char_character_literal : Extension<
														
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -485,6 +485,8 @@ private:
 
															   void LexNumericConstant    (Token &Result, const char *CurPtr);
														
 
															   void LexStringLiteral      (Token &Result, const char *CurPtr,
														
 
															                               tok::TokenKind Kind);
														
 
															+  void LexRawStringLiteral   (Token &Result, const char *CurPtr,
														
 
															+                              tok::TokenKind Kind);
														
 
															   void LexAngledStringLiteral(Token &Result, const char *CurPtr);
														
 
															   void LexCharConstant       (Token &Result, const char *CurPtr,
														
 
															                               tok::TokenKind Kind);
														
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -197,6 +197,7 @@ public:
 
															 private:
														
 
															   void init(const Token *StringToks, unsigned NumStringToks);
														
 
															+  void CopyStringFragment(const StringRef &Fragment);
														
 
															 };
														
 
															 }  // end namespace clang
														
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -33,6 +33,7 @@
 
															 #include "llvm/Support/Compiler.h"
														
 
															 #include "llvm/Support/MemoryBuffer.h"
														
 
															 #include <cctype>
														
 
															+#include <cstring>
														
 
															 using namespace clang;
														
 
															 static void InitCharacterInfo();
														
@@ -760,7 +761,8 @@ enum {
 
															   CHAR_LETTER   = 0x04,  // a-z,A-Z
														
 
															   CHAR_NUMBER   = 0x08,  // 0-9
														
 
															   CHAR_UNDER    = 0x10,  // _
														
 
															-  CHAR_PERIOD   = 0x20   // .
														
 
															+  CHAR_PERIOD   = 0x20,  // .
														
 
															+  CHAR_RAWDEL   = 0x40   // {}[]#<>%:;?*+-/^&|~!=,"'
														
 
															 };
														
 
															 // Statically initialize CharInfo table based on ASCII character set
														
@@ -785,20 +787,20 @@ static const unsigned char CharInfo[256] =
 
															    0           , 0           , 0           , 0           ,
														
 
															 //32 SP         33  !         34  "         35  #
														
 
															 //36  $         37  %         38  &         39  '
														
 
															-   CHAR_HORZ_WS, 0           , 0           , 0           ,
														
 
															-   0           , 0           , 0           , 0           ,
														
 
															+   CHAR_HORZ_WS, CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
														
 
															+   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
														
 
															 //40  (         41  )         42  *         43  +
														
 
															 //44  ,         45  -         46  .         47  /
														
 
															-   0           , 0           , 0           , 0           ,
														
 
															-   0           , 0           , CHAR_PERIOD , 0           ,
														
 
															+   0           , 0           , CHAR_RAWDEL , CHAR_RAWDEL ,
														
 
															+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
														
 
															 //48  0         49  1         50  2         51  3
														
 
															 //52  4         53  5         54  6         55  7
														
 
															    CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
														
 
															    CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER , CHAR_NUMBER ,
														
 
															 //56  8         57  9         58  :         59  ;
														
 
															 //60  <         61  =         62  >         63  ?
														
 
															-   CHAR_NUMBER , CHAR_NUMBER , 0           , 0           ,
														
 
															-   0           , 0           , 0           , 0           ,
														
 
															+   CHAR_NUMBER , CHAR_NUMBER , CHAR_RAWDEL , CHAR_RAWDEL ,
														
 
															+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
														
 
															 //64  @         65  A         66  B         67  C
														
 
															 //68  D         69  E         70  F         71  G
														
 
															    0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
														
@@ -813,8 +815,8 @@ static const unsigned char CharInfo[256] =
 
															    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
														
 
															 //88  X         89  Y         90  Z         91  [
														
 
															 //92  \         93  ]         94  ^         95  _
														
 
															-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
														
 
															-   0           , 0           , 0           , CHAR_UNDER  ,
														
 
															+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
														
 
															+   0           , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER  ,
														
 
															 //96  `         97  a         98  b         99  c
														
 
															 //100  d       101  e        102  f        103  g
														
 
															    0           , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
														
@@ -828,9 +830,9 @@ static const unsigned char CharInfo[256] =
 
															    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
														
 
															    CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_LETTER ,
														
 
															 //120  x       121  y        122  z        123  {
														
 
															-//124  |        125  }        126  ~        127 DEL
														
 
															-   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , 0           ,
														
 
															-   0           , 0           , 0           , 0
														
 
															+//124  |       125  }        126  ~        127 DEL
														
 
															+   CHAR_LETTER , CHAR_LETTER , CHAR_LETTER , CHAR_RAWDEL ,
														
 
															+   CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
														
 
															 };
														
 
															 static void InitCharacterInfo() {
														
@@ -888,6 +890,14 @@ static inline bool isNumberBody(unsigned char c) {
 
															     true : false;
														
 
															 }
														
 
															+/// isRawStringDelimBody - Return true if this is the body character of a
														
 
															+/// raw string delimiter.
														
 
															+static inline bool isRawStringDelimBody(unsigned char c) {
														
 
															+  return (CharInfo[c] &
														
 
															+          (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL)) ?
														
 
															+    true : false;
														
 
															+}
														
 
															+
														
 
															 //===----------------------------------------------------------------------===//
														
 
															 // Diagnostics forwarding code.
														
@@ -1363,6 +1373,78 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
 
															   Result.setLiteralData(TokStart);
														
 
															 }
														
 
															+/// LexRawStringLiteral - Lex the remainder of a raw string literal, after
														
 
															+/// having lexed R", LR", u8R", uR", or UR".
														
 
															+void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
														
 
															+                                tok::TokenKind Kind) {
														
 
															+  // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
														
 
															+  //  Between the initial and final double quote characters of the raw string,
														
 
															+  //  any transformations performed in phases 1 and 2 (trigraphs,
														
 
															+  //  universal-character-names, and line splicing) are reverted.
														
 
															+
														
 
															+  unsigned PrefixLen = 0;
														
 
															+
														
 
															+  while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
														
 
															+    ++PrefixLen;
														
 
															+
														
 
															+  // If the last character was not a '(', then we didn't lex a valid delimiter.
														
 
															+  if (CurPtr[PrefixLen] != '(') {
														
 
															+    if (!isLexingRawMode()) {
														
 
															+      const char *PrefixEnd = &CurPtr[PrefixLen];
														
 
															+      if (PrefixLen == 16) {
														
 
															+        Diag(PrefixEnd, diag::err_raw_delim_too_long);
														
 
															+      } else {
														
 
															+        Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
														
 
															+          << StringRef(PrefixEnd, 1);
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    // Search for the next '"' in hopes of salvaging the lexer. Unfortunately,
														
 
															+    // it's possible the '"' was intended to be part of the raw string, but
														
 
															+    // there's not much we can do about that.
														
 
															+    while (1) {
														
 
															+      char C = *CurPtr++;
														
 
															+
														
 
															+      if (C == '"')
														
 
															+        break;
														
 
															+      if (C == 0 && CurPtr-1 == BufferEnd) {
														
 
															+        --CurPtr;
														
 
															+        break;
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    FormTokenWithChars(Result, CurPtr, tok::unknown);
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  // Save prefix and move CurPtr past it
														
 
															+  const char *Prefix = CurPtr;
														
 
															+  CurPtr += PrefixLen + 1; // skip over prefix and '('
														
 
															+
														
 
															+  while (1) {
														
 
															+    char C = *CurPtr++;
														
 
															+
														
 
															+    if (C == ')') {
														
 
															+      // Check for prefix match and closing quote.
														
 
															+      if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] == '"') {
														
 
															+        CurPtr += PrefixLen + 1; // skip over prefix and '"'
														
 
															+        break;
														
 
															+      }
														
 
															+    } else if (C == 0 && CurPtr-1 == BufferEnd) { // End of file.
														
 
															+      if (!isLexingRawMode())
														
 
															+        Diag(BufferPtr, diag::err_unterminated_raw_string)
														
 
															+          << StringRef(Prefix, PrefixLen);
														
 
															+      FormTokenWithChars(Result, CurPtr-1, tok::unknown);
														
 
															+      return;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // Update the location of token as well as BufferPtr.
														
 
															+  const char *TokStart = BufferPtr;
														
 
															+  FormTokenWithChars(Result, CurPtr, Kind);
														
 
															+  Result.setLiteralData(TokStart);
														
 
															+}
														
 
															+
														
 
															 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
														
 
															 /// after having lexed the '<' character.  This is used for #include filenames.
														
 
															 void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
														
@@ -2262,12 +2344,36 @@ LexNextToken:
 
															         return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															                                tok::utf16_char_constant);
														
 
															-      // UTF-8 string literal
														
 
															-      if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
														
 
															-        return LexStringLiteral(Result,
														
 
															-                              ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															-                                          SizeTmp2, Result),
														
 
															-                              tok::utf8_string_literal);
														
 
															+      // UTF-16 raw string literal
														
 
															+      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
														
 
															+        return LexRawStringLiteral(Result,
														
 
															+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                           SizeTmp2, Result),
														
 
															+                               tok::utf16_string_literal);
														
 
															+
														
 
															+      if (Char == '8') {
														
 
															+        char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
														
 
															+
														
 
															+        // UTF-8 string literal
														
 
															+        if (Char2 == '"')
														
 
															+          return LexStringLiteral(Result,
														
 
															+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                           SizeTmp2, Result),
														
 
															+                               tok::utf8_string_literal);
														
 
															+
														
 
															+        if (Char2 == 'R') {
														
 
															+          unsigned SizeTmp3;
														
 
															+          char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
														
 
															+          // UTF-8 raw string literal
														
 
															+          if (Char3 == '"') {
														
 
															+            return LexRawStringLiteral(Result,
														
 
															+                   ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                           SizeTmp2, Result),
														
 
															+                               SizeTmp3, Result),
														
 
															+                   tok::utf8_string_literal);
														
 
															+          }
														
 
															+        }
														
 
															+      }
														
 
															     }
														
 
															     // treat u like the start of an identifier.
														
@@ -2289,11 +2395,34 @@ LexNextToken:
 
															       if (Char == '\'')
														
 
															         return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															                                tok::utf32_char_constant);
														
 
															+
														
 
															+      // UTF-32 raw string literal
														
 
															+      if (Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
														
 
															+        return LexRawStringLiteral(Result,
														
 
															+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                           SizeTmp2, Result),
														
 
															+                               tok::utf32_string_literal);
														
 
															     }
														
 
															     // treat U like the start of an identifier.
														
 
															     return LexIdentifier(Result, CurPtr);
														
 
															+  case 'R': // Identifier or C++0x raw string literal
														
 
															+    // Notify MIOpt that we read a non-whitespace/non-comment token.
														
 
															+    MIOpt.ReadToken();
														
 
															+
														
 
															+    if (Features.CPlusPlus0x) {
														
 
															+      Char = getCharAndSize(CurPtr, SizeTmp);
														
 
															+
														
 
															+      if (Char == '"')
														
 
															+        return LexRawStringLiteral(Result,
														
 
															+                                   ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                   tok::string_literal);
														
 
															+    }
														
 
															+
														
 
															+    // treat R like the start of an identifier.
														
 
															+    return LexIdentifier(Result, CurPtr);
														
 
															+
														
 
															   case 'L':   // Identifier (Loony) or wide literal (L'x' or L"xyz").
														
 
															     // Notify MIOpt that we read a non-whitespace/non-comment token.
														
 
															     MIOpt.ReadToken();
														
@@ -2304,6 +2433,14 @@ LexNextToken:
 
															       return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															                               tok::wide_string_literal);
														
 
															+    // Wide raw string literal.
														
 
															+    if (Features.CPlusPlus0x && Char == 'R' &&
														
 
															+        getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
														
 
															+      return LexRawStringLiteral(Result,
														
 
															+                               ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
														
 
															+                                           SizeTmp2, Result),
														
 
															+                               tok::wide_string_literal);
														
 
															+
														
 
															     // Wide character constant.
														
 
															     if (Char == '\'')
														
 
															       return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
														
@@ -2313,7 +2450,7 @@ LexNextToken:
 
															   // C99 6.4.2: Identifiers.
														
 
															   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
														
 
															   case 'H': case 'I': case 'J': case 'K':    /*'L'*/case 'M': case 'N':
														
 
															-  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T':    /*'U'*/
														
 
															+  case 'O': case 'P': case 'Q':    /*'R'*/case 'S': case 'T':    /*'U'*/
														
 
															   case 'V': case 'W': case 'X': case 'Y': case 'Z':
														
 
															   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
														
 
															   case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
														
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -713,6 +713,38 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
 
															 }
														
 
															+///       character-literal: [C++0x lex.ccon]
														
 
															+///         ' c-char-sequence '
														
 
															+///         u' c-char-sequence '
														
 
															+///         U' c-char-sequence '
														
 
															+///         L' c-char-sequence '
														
 
															+///       c-char-sequence:
														
 
															+///         c-char
														
 
															+///         c-char-sequence c-char
														
 
															+///       c-char:
														
 
															+///         any member of the source character set except the single-quote ',
														
 
															+///           backslash \, or new-line character
														
 
															+///         escape-sequence
														
 
															+///         universal-character-name
														
 
															+///       escape-sequence: [C++0x lex.ccon]
														
 
															+///         simple-escape-sequence
														
 
															+///         octal-escape-sequence
														
 
															+///         hexadecimal-escape-sequence
														
 
															+///       simple-escape-sequence:
														
 
															+///         one of \’ \" \? \\ \a \b \f \n \r \t \v
														
 
															+///       octal-escape-sequence:
														
 
															+///         \ octal-digit
														
 
															+///         \ octal-digit octal-digit
														
 
															+///         \ octal-digit octal-digit octal-digit
														
 
															+///       hexadecimal-escape-sequence:
														
 
															+///         \x hexadecimal-digit
														
 
															+///         hexadecimal-escape-sequence hexadecimal-digit
														
 
															+///       universal-character-name:
														
 
															+///         \u hex-quad
														
 
															+///         \U hex-quad hex-quad
														
 
															+///       hex-quad:
														
 
															+///         hex-digit hex-digit hex-digit hex-digit
														
 
															+///
														
 
															 CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
														
 
															                                      SourceLocation Loc, Preprocessor &PP,
														
 
															                                      tok::TokenKind kind) {
														
@@ -825,34 +857,52 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
 
															 }
														
 
															-///       string-literal: [C99 6.4.5]
														
 
															-///          " [s-char-sequence] "
														
 
															-///         L" [s-char-sequence] "
														
 
															+///       string-literal: [C++0x lex.string]
														
 
															+///         encoding-prefix " [s-char-sequence] "
														
 
															+///         encoding-prefix R raw-string
														
 
															+///       encoding-prefix:
														
 
															+///         u8
														
 
															+///         u
														
 
															+///         U
														
 
															+///         L
														
 
															 ///       s-char-sequence:
														
 
															 ///         s-char
														
 
															 ///         s-char-sequence s-char
														
 
															 ///       s-char:
														
 
															-///         any source character except the double quote ",
														
 
															-///           backslash \, or newline character
														
 
															-///         escape-character
														
 
															-///         universal-character-name
														
 
															-///       escape-character: [C99 6.4.4.4]
														
 
															-///         \ escape-code
														
 
															+///         any member of the source character set except the double-quote ",
														
 
															+///           backslash \, or new-line character
														
 
															+///         escape-sequence
														
 
															 ///         universal-character-name
														
 
															-///       escape-code:
														
 
															-///         character-escape-code
														
 
															-///         octal-escape-code
														
 
															-///         hex-escape-code
														
 
															-///       character-escape-code: one of
														
 
															-///         n t b r f v a
														
 
															-///         \ ' " ?
														
 
															-///       octal-escape-code:
														
 
															-///         octal-digit
														
 
															-///         octal-digit octal-digit
														
 
															-///         octal-digit octal-digit octal-digit
														
 
															-///       hex-escape-code:
														
 
															-///         x hex-digit
														
 
															-///         hex-escape-code hex-digit
														
 
															+///       raw-string:
														
 
															+///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
														
 
															+///       r-char-sequence:
														
 
															+///         r-char
														
 
															+///         r-char-sequence r-char
														
 
															+///       r-char:
														
 
															+///         any member of the source character set, except a right parenthesis )
														
 
															+///           followed by the initial d-char-sequence (which may be empty)
														
 
															+///           followed by a double quote ".
														
 
															+///       d-char-sequence:
														
 
															+///         d-char
														
 
															+///         d-char-sequence d-char
														
 
															+///       d-char:
														
 
															+///         any member of the basic source character set except:
														
 
															+///           space, the left parenthesis (, the right parenthesis ),
														
 
															+///           the backslash \, and the control characters representing horizontal
														
 
															+///           tab, vertical tab, form feed, and newline.
														
 
															+///       escape-sequence: [C++0x lex.ccon]
														
 
															+///         simple-escape-sequence
														
 
															+///         octal-escape-sequence
														
 
															+///         hexadecimal-escape-sequence
														
 
															+///       simple-escape-sequence:
														
 
															+///         one of \’ \" \? \\ \a \b \f \n \r \t \v
														
 
															+///       octal-escape-sequence:
														
 
															+///         \ octal-digit
														
 
															+///         \ octal-digit octal-digit
														
 
															+///         \ octal-digit octal-digit octal-digit
														
 
															+///       hexadecimal-escape-sequence:
														
 
															+///         \x hexadecimal-digit
														
 
															+///         hexadecimal-escape-sequence hexadecimal-digit
														
 
															 ///       universal-character-name:
														
 
															 ///         \u hex-quad
														
 
															 ///         \U hex-quad hex-quad
														
@@ -972,64 +1022,69 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
 
															         ++ThisTokBuf;
														
 
															     }
														
 
															-    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
														
 
															-    ++ThisTokBuf;
														
 
															+    // Check for raw string
														
 
															+    if (ThisTokBuf[0] == 'R') {
														
 
															+      ThisTokBuf += 2; // skip R"
														
 
															-    // Check if this is a pascal string
														
 
															-    if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
														
 
															-        ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
														
 
															-
														
 
															-      // If the \p sequence is found in the first token, we have a pascal string
														
 
															-      // Otherwise, if we already have a pascal string, ignore the first \p
														
 
															-      if (i == 0) {
														
 
															+      const char *Prefix = ThisTokBuf;
														
 
															+      while (ThisTokBuf[0] != '(')
														
 
															         ++ThisTokBuf;
														
 
															-        Pascal = true;
														
 
															-      } else if (Pascal)
														
 
															-        ThisTokBuf += 2;
														
 
															-    }
														
 
															+      ++ThisTokBuf; // skip '('
														
 
															+
														
 
															+      // remove same number of characters from the end
														
 
															+      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
														
 
															+        ThisTokEnd -= (ThisTokBuf - Prefix);
														
 
															+
														
 
															+      // Copy the string over
														
 
															+      CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
														
 
															+    } else {
														
 
															+      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
														
 
															+      ++ThisTokBuf; // skip "
														
 
															+
														
 
															+      // Check if this is a pascal string
														
 
															+      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
														
 
															+          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
														
 
															-    while (ThisTokBuf != ThisTokEnd) {
														
 
															-      // Is this a span of non-escape characters?
														
 
															-      if (ThisTokBuf[0] != '\\') {
														
 
															-        const char *InStart = ThisTokBuf;
														
 
															-        do {
														
 
															+        // If the \p sequence is found in the first token, we have a pascal string
														
 
															+        // Otherwise, if we already have a pascal string, ignore the first \p
														
 
															+        if (i == 0) {
														
 
															           ++ThisTokBuf;
														
 
															-        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
														
 
															-
														
 
															-        // Copy the character span over.
														
 
															-        unsigned Len = ThisTokBuf-InStart;
														
 
															-        if (CharByteWidth == 1) {
														
 
															-          memcpy(ResultPtr, InStart, Len);
														
 
															-          ResultPtr += Len;
														
 
															-        } else {
														
 
															-          // Note: our internal rep of wide char tokens is always little-endian.
														
 
															-          for (; Len; --Len, ++InStart) {
														
 
															-            *ResultPtr++ = InStart[0];
														
 
															-            // Add zeros at the end.
														
 
															-            for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
														
 
															-              *ResultPtr++ = 0;
														
 
															-          }
														
 
															-        }
														
 
															-        continue;
														
 
															+          Pascal = true;
														
 
															+        } else if (Pascal)
														
 
															+          ThisTokBuf += 2;
														
 
															       }
														
 
															-      // Is this a Universal Character Name escape?
														
 
															-      if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
														
 
															-        EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
														
 
															-                        hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
														
 
															-                        CharByteWidth, Diags, Features);
														
 
															-        continue;
														
 
															-      }
														
 
															-      // Otherwise, this is a non-UCN escape character.  Process it.
														
 
															-      unsigned ResultChar =
														
 
															-        ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
														
 
															-                          FullSourceLoc(StringToks[i].getLocation(), SM),
														
 
															-                          CharByteWidth*8, Diags);
														
 
															-      // Note: our internal rep of wide char tokens is always little-endian.
														
 
															-      *ResultPtr++ = ResultChar & 0xFF;
														
 
															+      while (ThisTokBuf != ThisTokEnd) {
														
 
															+        // Is this a span of non-escape characters?
														
 
															+        if (ThisTokBuf[0] != '\\') {
														
 
															+          const char *InStart = ThisTokBuf;
														
 
															+          do {
														
 
															+            ++ThisTokBuf;
														
 
															+          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
														
 
															+
														
 
															+          // Copy the character span over.
														
 
															+          CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
														
 
															+          continue;
														
 
															+        }
														
 
															+        // Is this a Universal Character Name escape?
														
 
															+        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
														
 
															+          EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
														
 
															+                          hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
														
 
															+                          CharByteWidth, Diags, Features);
														
 
															+          continue;
														
 
															+        }
														
 
															+        // Otherwise, this is a non-UCN escape character.  Process it.
														
 
															+        unsigned ResultChar =
														
 
															+          ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
														
 
															+                            FullSourceLoc(StringToks[i].getLocation(), SM),
														
 
															+                            CharByteWidth*8, Diags);
														
 
															+
														
 
															+        // Note: our internal rep of wide char tokens is always little-endian.
														
 
															+        *ResultPtr++ = ResultChar & 0xFF;
														
 
															-      for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
														
 
															-        *ResultPtr++ = ResultChar >> i*8;
														
 
															+        for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
														
 
															+          *ResultPtr++ = ResultChar >> i*8;
														
 
															+      }
														
 
															     }
														
 
															   }
														
@@ -1062,6 +1117,25 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
 
															 }
														
 
															+/// copyStringFragment - This function copies from Start to End into ResultPtr.
														
 
															+/// Performs widening for multi-byte characters.
														
 
															+void StringLiteralParser::CopyStringFragment(const StringRef &Fragment) {
														
 
															+  // Copy the character span over.
														
 
															+  if (CharByteWidth == 1) {
														
 
															+    memcpy(ResultPtr, Fragment.data(), Fragment.size());
														
 
															+    ResultPtr += Fragment.size();
														
 
															+  } else {
														
 
															+    // Note: our internal rep of wide char tokens is always little-endian.
														
 
															+    for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
														
 
															+      *ResultPtr++ = *I;
														
 
															+      // Add zeros at the end.
														
 
															+      for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
														
 
															+        *ResultPtr++ = 0;
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+
														
 
															 /// getOffsetOfStringByte - This function returns the offset of the
														
 
															 /// specified byte of the string data represented by Token.  This handles
														
 
															 /// advancing over escape sequences in the string.
														
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -17,39 +17,53 @@
 
															 using namespace clang;
														
 
															+/// IsStringPrefix - Return true if Str is a string prefix.
														
 
															+/// 'L', 'u', 'U', or 'u8'. Including raw versions.
														
 
															+static bool IsStringPrefix(const StringRef &Str, bool CPlusPlus0x) {
														
 
															+
														
 
															+  if (Str[0] == 'L' ||
														
 
															+      (CPlusPlus0x && (Str[0] == 'u' || Str[0] == 'U' || Str[0] == 'R'))) {
														
 
															+
														
 
															+    if (Str.size() == 1)
														
 
															+      return true; // "L", "u", "U", and "R"
														
 
															+
														
 
															+    // Check for raw flavors. Need to make sure the first character wasn't
														
 
															+    // already R. Need CPlusPlus0x check for "LR".
														
 
															+    if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus0x)
														
 
															+      return true; // "LR", "uR", "UR"
														
 
															+
														
 
															+    // Check for "u8" and "u8R"
														
 
															+    if (Str[0] == 'u' && Str[1] == '8') {
														
 
															+      if (Str.size() == 2) return true; // "u8"
														
 
															+      if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R"
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  return false;
														
 
															+}
														
 
															+
														
 
															 /// IsIdentifierStringPrefix - Return true if the spelling of the token
														
 
															-/// is literally 'L', 'u', 'U', or 'u8'.
														
 
															+/// is literally 'L', 'u', 'U', or 'u8'. Including raw versions.
														
 
															 bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
														
 
															   const LangOptions &LangOpts = PP.getLangOptions();
														
 
															   if (!Tok.needsCleaning()) {
														
 
															-    if (Tok.getLength() != 1 && Tok.getLength() != 2)
														
 
															+    if (Tok.getLength() < 1 || Tok.getLength() > 3)
														
 
															       return false;
														
 
															     SourceManager &SM = PP.getSourceManager();
														
 
															     const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
														
 
															-    if (Tok.getLength() == 1)
														
 
															-      return Ptr[0] == 'L' ||
														
 
															-             (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
														
 
															-    if (Tok.getLength() == 2)
														
 
															-      return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
														
 
															+    return IsStringPrefix(StringRef(Ptr, Tok.getLength()),
														
 
															+                          LangOpts.CPlusPlus0x);
														
 
															   }
														
 
															   if (Tok.getLength() < 256) {
														
 
															     char Buffer[256];
														
 
															     const char *TokPtr = Buffer;
														
 
															     unsigned length = PP.getSpelling(Tok, TokPtr);
														
 
															-    if (length == 1)
														
 
															-      return TokPtr[0] == 'L' ||
														
 
															-             (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
														
 
															-    if (length == 2)
														
 
															-      return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
														
 
															-    return false;
														
 
															+    return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus0x);
														
 
															   }
														
 
															-  std::string TokStr = PP.getSpelling(Tok);
														
 
															-  return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
														
 
															-                                                    TokStr == "u" ||
														
 
															-                                                    TokStr == "U"));
														
 
															+  return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus0x);
														
 
															 }
														
 
															 TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
														
--- a/test/CodeGen/string-literal.c
+++ b/test/CodeGen/string-literal.c
@@ -1,6 +1,6 @@
 
															 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
														
 
															 // RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
														
 
															-// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=C %s
														
 
															+// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -check-prefix=CPP0X %s
														
 
															 #include <stddef.h>
														
@@ -38,5 +38,28 @@ int main() {
 
															   // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
														
 
															   const char *g = u8"def";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"ghi\00", align 1
														
 
															+  const char *h = R"foo(ghi)foo";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"jkl\00", align 1
														
 
															+  const char *i = u8R"bar(jkl)bar";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"G\00H\00\00\00", align 2
														
 
															+  const char16_t *j = uR"foo(GH)foo";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"I\00\00\00J\00\00\00\00\00\00\00", align 4
														
 
															+  const char32_t *k = UR"bar(IJ)bar";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] c"K\00\00\00L\00\00\00\00\00\00\00", align 4
														
 
															+  const wchar_t *l = LR"bar(KL)bar";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [9 x i8] c"abc\5Cndef\00", align 1
														
 
															+  const char *m = R"(abc\ndef)";
														
 
															+
														
 
															+  // CHECK-CPP0X: private unnamed_addr constant [8 x i8] c"abc\0Adef\00", align 1
														
 
															+  const char *n = R"(abc
														
 
															+def)";
														
 
															+
														
 
															 #endif
														
 
															 }
														
--- a/test/Lexer/cxx0x_raw_string_delim_length.cpp
+++ b/test/Lexer/cxx0x_raw_string_delim_length.cpp
@@ -0,0 +1,6 @@
 
															+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
														
 
															+
														
 
															+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
														
 
															+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string delimiter longer than 16 characters'
														
 
															+
														
 
															+const char *str = R"abcdefghijkmnopqrstuvwxyz(abcdef)abcdefghijkmnopqrstuvwxyz";
														
--- a/test/Lexer/cxx0x_raw_string_unterminated.cpp
+++ b/test/Lexer/cxx0x_raw_string_unterminated.cpp
@@ -0,0 +1,8 @@
 
															+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
														
 
															+
														
 
															+const char *str = R"foo(abc
														
 
															+def)bar";
														
 
															+// RUN: %clang_cc1 -std=c++0x -E %s 2>&1 | grep 'error: raw string missing terminating delimiter )foo"'
														
 
															+
														
 
															+const char *str = R"foo(abc
														
 
															+def)bar";
														
--- a/test/SemaCXX/cxx0x-type-convert-construct.cpp
+++ b/test/SemaCXX/cxx0x-type-convert-construct.cpp
@@ -7,4 +7,15 @@ void f() {
 
															   ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
														
 
															   char32_t *Ustr;
														
 
															   Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
														
 
															+
														
 
															+  char *Rstr;
														
 
															+  Rstr = "a raw string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
														
 
															+  wchar_t *LRstr;
														
 
															+  LRstr = LR"foo(a wide raw string)foo"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
														
 
															+  char *u8Rstr;
														
 
															+  u8Rstr = u8R"foo(a UTF-8 raw string)foo"; // expected-error {{assigning to 'char *' from incompatible type 'const char [19]'}}
														
 
															+  char16_t *uRstr;
														
 
															+  uRstr = uR"foo(a UTF-16 raw string)foo"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [20]'}}
														
 
															+  char32_t *URstr;
														
 
															+  URstr = UR"foo(a UTF-32 raw string)foo"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [20]'}}
														
 
															 }