LiteralSupport.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. //===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by Steve Naroff and is distributed under
  6. // the University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the NumericLiteralParser, CharLiteralParser, and
  11. // StringLiteralParser interfaces.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Lex/LiteralSupport.h"
  15. #include "clang/Lex/Preprocessor.h"
  16. #include "clang/Basic/TargetInfo.h"
  17. #include "clang/Basic/Diagnostic.h"
  18. #include "llvm/ADT/APInt.h"
  19. #include "llvm/ADT/StringExtras.h"
  20. using namespace clang;
  21. /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
  22. /// not valid.
  23. static int HexDigitValue(char C) {
  24. if (C >= '0' && C <= '9') return C-'0';
  25. if (C >= 'a' && C <= 'f') return C-'a'+10;
  26. if (C >= 'A' && C <= 'F') return C-'A'+10;
  27. return -1;
  28. }
  29. /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
  30. /// either a character or a string literal.
  31. static unsigned ProcessCharEscape(const char *&ThisTokBuf,
  32. const char *ThisTokEnd, bool &HadError,
  33. SourceLocation Loc, bool IsWide,
  34. Preprocessor &PP) {
  35. // Skip the '\' char.
  36. ++ThisTokBuf;
  37. // We know that this character can't be off the end of the buffer, because
  38. // that would have been \", which would not have been the end of string.
  39. unsigned ResultChar = *ThisTokBuf++;
  40. switch (ResultChar) {
  41. // These map to themselves.
  42. case '\\': case '\'': case '"': case '?': break;
  43. // These have fixed mappings.
  44. case 'a':
  45. // TODO: K&R: the meaning of '\\a' is different in traditional C
  46. ResultChar = 7;
  47. break;
  48. case 'b':
  49. ResultChar = 8;
  50. break;
  51. case 'e':
  52. PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
  53. ResultChar = 27;
  54. break;
  55. case 'f':
  56. ResultChar = 12;
  57. break;
  58. case 'n':
  59. ResultChar = 10;
  60. break;
  61. case 'r':
  62. ResultChar = 13;
  63. break;
  64. case 't':
  65. ResultChar = 9;
  66. break;
  67. case 'v':
  68. ResultChar = 11;
  69. break;
  70. //case 'u': case 'U': // FIXME: UCNs.
  71. case 'x': { // Hex escape.
  72. ResultChar = 0;
  73. if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
  74. PP.Diag(Loc, diag::err_hex_escape_no_digits);
  75. HadError = 1;
  76. break;
  77. }
  78. // Hex escapes are a maximal series of hex digits.
  79. bool Overflow = false;
  80. for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
  81. int CharVal = HexDigitValue(ThisTokBuf[0]);
  82. if (CharVal == -1) break;
  83. Overflow |= ResultChar & 0xF0000000; // About to shift out a digit?
  84. ResultChar <<= 4;
  85. ResultChar |= CharVal;
  86. }
  87. // See if any bits will be truncated when evaluated as a character.
  88. unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
  89. : PP.getTargetInfo().getCharWidth(Loc);
  90. if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
  91. Overflow = true;
  92. ResultChar &= ~0U >> (32-CharWidth);
  93. }
  94. // Check for overflow.
  95. if (Overflow) // Too many digits to fit in
  96. PP.Diag(Loc, diag::warn_hex_escape_too_large);
  97. break;
  98. }
  99. case '0': case '1': case '2': case '3':
  100. case '4': case '5': case '6': case '7': {
  101. // Octal escapes.
  102. --ThisTokBuf;
  103. ResultChar = 0;
  104. // Octal escapes are a series of octal digits with maximum length 3.
  105. // "\0123" is a two digit sequence equal to "\012" "3".
  106. unsigned NumDigits = 0;
  107. do {
  108. ResultChar <<= 3;
  109. ResultChar |= *ThisTokBuf++ - '0';
  110. ++NumDigits;
  111. } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
  112. ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
  113. // Check for overflow. Reject '\777', but not L'\777'.
  114. unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc)
  115. : PP.getTargetInfo().getCharWidth(Loc);
  116. if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
  117. PP.Diag(Loc, diag::warn_octal_escape_too_large);
  118. ResultChar &= ~0U >> (32-CharWidth);
  119. }
  120. break;
  121. }
  122. // Otherwise, these are not valid escapes.
  123. case '(': case '{': case '[': case '%':
  124. // GCC accepts these as extensions. We warn about them as such though.
  125. if (!PP.getLangOptions().NoExtensions) {
  126. PP.Diag(Loc, diag::ext_nonstandard_escape,
  127. std::string()+(char)ResultChar);
  128. break;
  129. }
  130. // FALL THROUGH.
  131. default:
  132. if (isgraph(ThisTokBuf[0])) {
  133. PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
  134. } else {
  135. PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
  136. }
  137. break;
  138. }
  139. return ResultChar;
  140. }
  141. /// integer-constant: [C99 6.4.4.1]
  142. /// decimal-constant integer-suffix
  143. /// octal-constant integer-suffix
  144. /// hexadecimal-constant integer-suffix
  145. /// decimal-constant:
  146. /// nonzero-digit
  147. /// decimal-constant digit
  148. /// octal-constant:
  149. /// 0
  150. /// octal-constant octal-digit
  151. /// hexadecimal-constant:
  152. /// hexadecimal-prefix hexadecimal-digit
  153. /// hexadecimal-constant hexadecimal-digit
  154. /// hexadecimal-prefix: one of
  155. /// 0x 0X
  156. /// integer-suffix:
  157. /// unsigned-suffix [long-suffix]
  158. /// unsigned-suffix [long-long-suffix]
  159. /// long-suffix [unsigned-suffix]
  160. /// long-long-suffix [unsigned-sufix]
  161. /// nonzero-digit:
  162. /// 1 2 3 4 5 6 7 8 9
  163. /// octal-digit:
  164. /// 0 1 2 3 4 5 6 7
  165. /// hexadecimal-digit:
  166. /// 0 1 2 3 4 5 6 7 8 9
  167. /// a b c d e f
  168. /// A B C D E F
  169. /// unsigned-suffix: one of
  170. /// u U
  171. /// long-suffix: one of
  172. /// l L
  173. /// long-long-suffix: one of
  174. /// ll LL
  175. ///
  176. /// floating-constant: [C99 6.4.4.2]
  177. /// TODO: add rules...
  178. ///
  179. NumericLiteralParser::
  180. NumericLiteralParser(const char *begin, const char *end,
  181. SourceLocation TokLoc, Preprocessor &pp)
  182. : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
  183. s = DigitsBegin = begin;
  184. saw_exponent = false;
  185. saw_period = false;
  186. saw_float_suffix = false;
  187. isLong = false;
  188. isUnsigned = false;
  189. isLongLong = false;
  190. hadError = false;
  191. if (*s == '0') { // parse radix
  192. s++;
  193. if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
  194. s++;
  195. radix = 16;
  196. DigitsBegin = s;
  197. s = SkipHexDigits(s);
  198. if (s == ThisTokEnd) {
  199. // Done.
  200. } else if (*s == '.') {
  201. s++;
  202. saw_period = true;
  203. s = SkipHexDigits(s);
  204. }
  205. // A binary exponent can appear with or with a '.'. If dotted, the
  206. // binary exponent is required.
  207. if (*s == 'p' || *s == 'P') {
  208. s++;
  209. saw_exponent = true;
  210. if (*s == '+' || *s == '-') s++; // sign
  211. const char *first_non_digit = SkipDigits(s);
  212. if (first_non_digit == s) {
  213. Diag(TokLoc, diag::err_exponent_has_no_digits);
  214. return;
  215. } else {
  216. s = first_non_digit;
  217. }
  218. } else if (saw_period) {
  219. Diag(TokLoc, diag::err_hexconstant_requires_exponent);
  220. return;
  221. }
  222. } else if (*s == 'b' || *s == 'B') {
  223. // 0b101010 is a GCC extension.
  224. ++s;
  225. radix = 2;
  226. DigitsBegin = s;
  227. s = SkipBinaryDigits(s);
  228. if (s == ThisTokEnd) {
  229. // Done.
  230. } else if (isxdigit(*s)) {
  231. Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1));
  232. return;
  233. }
  234. PP.Diag(TokLoc, diag::ext_binary_literal);
  235. } else {
  236. // For now, the radix is set to 8. If we discover that we have a
  237. // floating point constant, the radix will change to 10. Octal floating
  238. // point constants are not permitted (only decimal and hexadecimal).
  239. radix = 8;
  240. DigitsBegin = s;
  241. s = SkipOctalDigits(s);
  242. if (s == ThisTokEnd) {
  243. // Done.
  244. } else if (isxdigit(*s)) {
  245. Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1));
  246. return;
  247. } else if (*s == '.') {
  248. s++;
  249. radix = 10;
  250. saw_period = true;
  251. s = SkipDigits(s);
  252. }
  253. if (*s == 'e' || *s == 'E') { // exponent
  254. s++;
  255. radix = 10;
  256. saw_exponent = true;
  257. if (*s == '+' || *s == '-') s++; // sign
  258. const char *first_non_digit = SkipDigits(s);
  259. if (first_non_digit == s) {
  260. Diag(TokLoc, diag::err_exponent_has_no_digits);
  261. return;
  262. } else {
  263. s = first_non_digit;
  264. }
  265. }
  266. }
  267. } else { // the first digit is non-zero
  268. radix = 10;
  269. s = SkipDigits(s);
  270. if (s == ThisTokEnd) {
  271. // Done.
  272. } else if (isxdigit(*s)) {
  273. Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1));
  274. return;
  275. } else if (*s == '.') {
  276. s++;
  277. saw_period = true;
  278. s = SkipDigits(s);
  279. }
  280. if (*s == 'e' || *s == 'E') { // exponent
  281. s++;
  282. saw_exponent = true;
  283. if (*s == '+' || *s == '-') s++; // sign
  284. const char *first_non_digit = SkipDigits(s);
  285. if (first_non_digit == s) {
  286. Diag(TokLoc, diag::err_exponent_has_no_digits);
  287. return;
  288. } else {
  289. s = first_non_digit;
  290. }
  291. }
  292. }
  293. SuffixBegin = s;
  294. if (saw_period || saw_exponent) {
  295. if (s < ThisTokEnd) { // parse size suffix (float, long double)
  296. if (*s == 'f' || *s == 'F') {
  297. saw_float_suffix = true;
  298. s++;
  299. } else if (*s == 'l' || *s == 'L') {
  300. isLong = true;
  301. s++;
  302. }
  303. if (s != ThisTokEnd) {
  304. Diag(TokLoc, diag::err_invalid_suffix_float_constant,
  305. std::string(SuffixBegin, ThisTokEnd));
  306. return;
  307. }
  308. }
  309. } else {
  310. if (s < ThisTokEnd) {
  311. // parse int suffix - they can appear in any order ("ul", "lu", "llu").
  312. if (*s == 'u' || *s == 'U') {
  313. s++;
  314. isUnsigned = true; // unsigned
  315. if ((s < ThisTokEnd) && (*s == 'l' || *s == 'L')) {
  316. s++;
  317. // handle "long long" type - l's need to be adjacent and same case.
  318. if ((s < ThisTokEnd) && (*s == *(s-1))) {
  319. isLongLong = true; // unsigned long long
  320. s++;
  321. } else {
  322. isLong = true; // unsigned long
  323. }
  324. }
  325. } else if (*s == 'l' || *s == 'L') {
  326. s++;
  327. // handle "long long" types - l's need to be adjacent and same case.
  328. if ((s < ThisTokEnd) && (*s == *(s-1))) {
  329. s++;
  330. if ((s < ThisTokEnd) && (*s == 'u' || *s == 'U')) {
  331. isUnsigned = true; // unsigned long long
  332. s++;
  333. } else {
  334. isLongLong = true; // long long
  335. }
  336. } else { // handle "long" types
  337. if ((s < ThisTokEnd) && (*s == 'u' || *s == 'U')) {
  338. isUnsigned = true; // unsigned long
  339. s++;
  340. } else {
  341. isLong = true; // long
  342. }
  343. }
  344. }
  345. if (s != ThisTokEnd) {
  346. Diag(TokLoc, diag::err_invalid_suffix_integer_constant,
  347. std::string(SuffixBegin, ThisTokEnd));
  348. return;
  349. }
  350. }
  351. }
  352. }
  353. /// GetIntegerValue - Convert this numeric literal value to an APInt that
  354. /// matches Val's input width. If there is an overflow, set Val to the low bits
  355. /// of the result and return true. Otherwise, return false.
  356. bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
  357. Val = 0;
  358. s = DigitsBegin;
  359. llvm::APInt RadixVal(Val.getBitWidth(), radix);
  360. llvm::APInt CharVal(Val.getBitWidth(), 0);
  361. llvm::APInt OldVal = Val;
  362. bool OverflowOccurred = false;
  363. while (s < SuffixBegin) {
  364. unsigned C = HexDigitValue(*s++);
  365. // If this letter is out of bound for this radix, reject it.
  366. assert(C < radix && "NumericLiteralParser ctor should have rejected this");
  367. CharVal = C;
  368. // Add the digit to the value in the appropriate radix. If adding in digits
  369. // made the value smaller, then this overflowed.
  370. OldVal = Val;
  371. // Multiply by radix, did overflow occur on the multiply?
  372. Val *= RadixVal;
  373. OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
  374. OldVal = Val;
  375. // Add value, did overflow occur on the value?
  376. Val += CharVal;
  377. OverflowOccurred |= Val.ult(OldVal);
  378. OverflowOccurred |= Val.ult(CharVal);
  379. }
  380. return OverflowOccurred;
  381. }
  382. // GetFloatValue - Poor man's floatvalue (FIXME).
  383. float NumericLiteralParser::GetFloatValue() {
  384. char floatChars[256];
  385. strncpy(floatChars, ThisTokBegin, ThisTokEnd-ThisTokBegin);
  386. floatChars[ThisTokEnd-ThisTokBegin] = '\0';
  387. return strtof(floatChars, 0);
  388. }
  389. void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
  390. const std::string &M) {
  391. PP.Diag(Loc, DiagID, M);
  392. hadError = true;
  393. }
  394. CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
  395. SourceLocation Loc, Preprocessor &PP) {
  396. // At this point we know that the character matches the regex "L?'.*'".
  397. HadError = false;
  398. Value = 0;
  399. // Determine if this is a wide character.
  400. IsWide = begin[0] == 'L';
  401. if (IsWide) ++begin;
  402. // Skip over the entry quote.
  403. assert(begin[0] == '\'' && "Invalid token lexed");
  404. ++begin;
  405. // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
  406. // size of "value".
  407. assert(PP.getTargetInfo().getIntWidth(Loc) == 32 &&
  408. "Assumes sizeof(int) == 4 for now");
  409. // FIXME: This assumes that wchar_t is 32-bits for now.
  410. assert(PP.getTargetInfo().getWCharWidth(Loc) == 32 &&
  411. "Assumes sizeof(wchar_t) == 4 for now");
  412. // FIXME: This extensively assumes that 'char' is 8-bits.
  413. assert(PP.getTargetInfo().getCharWidth(Loc) == 8 &&
  414. "Assumes char is 8 bits");
  415. bool isFirstChar = true;
  416. bool isMultiChar = false;
  417. while (begin[0] != '\'') {
  418. unsigned ResultChar;
  419. if (begin[0] != '\\') // If this is a normal character, consume it.
  420. ResultChar = *begin++;
  421. else // Otherwise, this is an escape character.
  422. ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
  423. // If this is a multi-character constant (e.g. 'abc'), handle it. These are
  424. // implementation defined (C99 6.4.4.4p10).
  425. if (!isFirstChar) {
  426. // If this is the second character being processed, do special handling.
  427. if (!isMultiChar) {
  428. isMultiChar = true;
  429. // Warn about discarding the top bits for multi-char wide-character
  430. // constants (L'abcd').
  431. if (IsWide)
  432. PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
  433. }
  434. if (IsWide) {
  435. // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
  436. Value = 0;
  437. } else {
  438. // Narrow character literals act as though their value is concatenated
  439. // in this implementation.
  440. if (((Value << 8) >> 8) != Value)
  441. PP.Diag(Loc, diag::warn_char_constant_too_large);
  442. Value <<= 8;
  443. }
  444. }
  445. Value += ResultChar;
  446. isFirstChar = false;
  447. }
  448. // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
  449. // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
  450. // character constants are not sign extended in the this implementation:
  451. // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
  452. if (!IsWide && !isMultiChar && (Value & 128) &&
  453. PP.getTargetInfo().isCharSigned(Loc))
  454. Value = (signed char)Value;
  455. }
  456. /// string-literal: [C99 6.4.5]
  457. /// " [s-char-sequence] "
  458. /// L" [s-char-sequence] "
  459. /// s-char-sequence:
  460. /// s-char
  461. /// s-char-sequence s-char
  462. /// s-char:
  463. /// any source character except the double quote ",
  464. /// backslash \, or newline character
  465. /// escape-character
  466. /// universal-character-name
  467. /// escape-character: [C99 6.4.4.4]
  468. /// \ escape-code
  469. /// universal-character-name
  470. /// escape-code:
  471. /// character-escape-code
  472. /// octal-escape-code
  473. /// hex-escape-code
  474. /// character-escape-code: one of
  475. /// n t b r f v a
  476. /// \ ' " ?
  477. /// octal-escape-code:
  478. /// octal-digit
  479. /// octal-digit octal-digit
  480. /// octal-digit octal-digit octal-digit
  481. /// hex-escape-code:
  482. /// x hex-digit
  483. /// hex-escape-code hex-digit
  484. /// universal-character-name:
  485. /// \u hex-quad
  486. /// \U hex-quad hex-quad
  487. /// hex-quad:
  488. /// hex-digit hex-digit hex-digit hex-digit
  489. ///
  490. StringLiteralParser::
  491. StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
  492. Preprocessor &pp, TargetInfo &t)
  493. : PP(pp), Target(t) {
  494. // Scan all of the string portions, remember the max individual token length,
  495. // computing a bound on the concatenated string length, and see whether any
  496. // piece is a wide-string. If any of the string portions is a wide-string
  497. // literal, the result is a wide-string literal [C99 6.4.5p4].
  498. MaxTokenLength = StringToks[0].getLength();
  499. SizeBound = StringToks[0].getLength()-2; // -2 for "".
  500. AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
  501. hadError = false;
  502. // Implement Translation Phase #6: concatenation of string literals
  503. /// (C99 5.1.1.2p1). The common case is only one string fragment.
  504. for (unsigned i = 1; i != NumStringToks; ++i) {
  505. // The string could be shorter than this if it needs cleaning, but this is a
  506. // reasonable bound, which is all we need.
  507. SizeBound += StringToks[i].getLength()-2; // -2 for "".
  508. // Remember maximum string piece length.
  509. if (StringToks[i].getLength() > MaxTokenLength)
  510. MaxTokenLength = StringToks[i].getLength();
  511. // Remember if we see any wide strings.
  512. AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
  513. }
  514. // Include space for the null terminator.
  515. ++SizeBound;
  516. // TODO: K&R warning: "traditional C rejects string constant concatenation"
  517. // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
  518. // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
  519. wchar_tByteWidth = ~0U;
  520. if (AnyWide) {
  521. wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation());
  522. assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
  523. wchar_tByteWidth /= 8;
  524. }
  525. // The output buffer size needs to be large enough to hold wide characters.
  526. // This is a worst-case assumption which basically corresponds to L"" "long".
  527. if (AnyWide)
  528. SizeBound *= wchar_tByteWidth;
  529. // Size the temporary buffer to hold the result string data.
  530. ResultBuf.resize(SizeBound);
  531. // Likewise, but for each string piece.
  532. llvm::SmallString<512> TokenBuf;
  533. TokenBuf.resize(MaxTokenLength);
  534. // Loop over all the strings, getting their spelling, and expanding them to
  535. // wide strings as appropriate.
  536. ResultPtr = &ResultBuf[0]; // Next byte to fill in.
  537. for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
  538. const char *ThisTokBuf = &TokenBuf[0];
  539. // Get the spelling of the token, which eliminates trigraphs, etc. We know
  540. // that ThisTokBuf points to a buffer that is big enough for the whole token
  541. // and 'spelled' tokens can only shrink.
  542. unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
  543. const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
  544. // TODO: Input character set mapping support.
  545. // Skip L marker for wide strings.
  546. bool ThisIsWide = false;
  547. if (ThisTokBuf[0] == 'L') {
  548. ++ThisTokBuf;
  549. ThisIsWide = true;
  550. }
  551. assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
  552. ++ThisTokBuf;
  553. while (ThisTokBuf != ThisTokEnd) {
  554. // Is this a span of non-escape characters?
  555. if (ThisTokBuf[0] != '\\') {
  556. const char *InStart = ThisTokBuf;
  557. do {
  558. ++ThisTokBuf;
  559. } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
  560. // Copy the character span over.
  561. unsigned Len = ThisTokBuf-InStart;
  562. if (!AnyWide) {
  563. memcpy(ResultPtr, InStart, Len);
  564. ResultPtr += Len;
  565. } else {
  566. // Note: our internal rep of wide char tokens is always little-endian.
  567. for (; Len; --Len, ++InStart) {
  568. *ResultPtr++ = InStart[0];
  569. // Add zeros at the end.
  570. for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
  571. *ResultPtr++ = 0;
  572. }
  573. }
  574. continue;
  575. }
  576. // Otherwise, this is an escape character. Process it.
  577. unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
  578. StringToks[i].getLocation(),
  579. ThisIsWide, PP);
  580. // Note: our internal rep of wide char tokens is always little-endian.
  581. *ResultPtr++ = ResultChar & 0xFF;
  582. if (AnyWide) {
  583. for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
  584. *ResultPtr++ = ResultChar >> i*8;
  585. }
  586. }
  587. }
  588. // Add zero terminator.
  589. *ResultPtr = 0;
  590. if (AnyWide) {
  591. for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
  592. *ResultPtr++ = 0;
  593. }
  594. }