PrintfFormatString.cpp 19 KB


  1. //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Handling of format string in printf and friends. The structure of format
  11. // strings for fprintf() are described in C99 7.19.6.1.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Analysis/Analyses/FormatString.h"
  15. #include "FormatStringParsing.h"
  16. using clang::analyze_format_string::ArgTypeResult;
  17. using clang::analyze_format_string::FormatStringHandler;
  18. using clang::analyze_format_string::LengthModifier;
  19. using clang::analyze_format_string::OptionalAmount;
  20. using clang::analyze_printf::ConversionSpecifier;
  21. using clang::analyze_printf::PrintfSpecifier;
  22. using namespace clang;
  23. typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
  24. PrintfSpecifierResult;
  25. //===----------------------------------------------------------------------===//
  26. // Methods for parsing format strings.
  27. //===----------------------------------------------------------------------===//
  28. using analyze_format_string::ParseNonPositionAmount;
  29. static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
  30. const char *Start, const char *&Beg, const char *E,
  31. unsigned *argIndex) {
  32. if (argIndex) {
  33. FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
  34. }
  35. else {
  36. const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
  37. analyze_format_string::PrecisionPos);
  38. if (Amt.isInvalid())
  39. return true;
  40. FS.setPrecision(Amt);
  41. }
  42. return false;
  43. }
  44. static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
  45. const char *&Beg,
  46. const char *E,
  47. unsigned &argIndex) {
  48. using namespace clang::analyze_printf;
  49. const char *I = Beg;
  50. const char *Start = 0;
  51. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  52. // Look for a '%' character that indicates the start of a format specifier.
  53. for ( ; I != E ; ++I) {
  54. char c = *I;
  55. if (c == '\0') {
  56. // Detect spurious null characters, which are likely errors.
  57. H.HandleNullChar(I);
  58. return true;
  59. }
  60. if (c == '%') {
  61. Start = I++; // Record the start of the format specifier.
  62. break;
  63. }
  64. }
  65. // No format specifier found?
  66. if (!Start)
  67. return false;
  68. if (I == E) {
  69. // No more characters left?
  70. H.HandleIncompleteSpecifier(Start, E - Start);
  71. return true;
  72. }
  73. PrintfSpecifier FS;
  74. if (ParseArgPosition(H, FS, Start, I, E))
  75. return true;
  76. if (I == E) {
  77. // No more characters left?
  78. H.HandleIncompleteSpecifier(Start, E - Start);
  79. return true;
  80. }
  81. // Look for flags (if any).
  82. bool hasMore = true;
  83. for ( ; I != E; ++I) {
  84. switch (*I) {
  85. default: hasMore = false; break;
  86. case '-': FS.setIsLeftJustified(I); break;
  87. case '+': FS.setHasPlusPrefix(I); break;
  88. case ' ': FS.setHasSpacePrefix(I); break;
  89. case '#': FS.setHasAlternativeForm(I); break;
  90. case '0': FS.setHasLeadingZeros(I); break;
  91. }
  92. if (!hasMore)
  93. break;
  94. }
  95. if (I == E) {
  96. // No more characters left?
  97. H.HandleIncompleteSpecifier(Start, E - Start);
  98. return true;
  99. }
  100. // Look for the field width (if any).
  101. if (ParseFieldWidth(H, FS, Start, I, E,
  102. FS.usesPositionalArg() ? 0 : &argIndex))
  103. return true;
  104. if (I == E) {
  105. // No more characters left?
  106. H.HandleIncompleteSpecifier(Start, E - Start);
  107. return true;
  108. }
  109. // Look for the precision (if any).
  110. if (*I == '.') {
  111. ++I;
  112. if (I == E) {
  113. H.HandleIncompleteSpecifier(Start, E - Start);
  114. return true;
  115. }
  116. if (ParsePrecision(H, FS, Start, I, E,
  117. FS.usesPositionalArg() ? 0 : &argIndex))
  118. return true;
  119. if (I == E) {
  120. // No more characters left?
  121. H.HandleIncompleteSpecifier(Start, E - Start);
  122. return true;
  123. }
  124. }
  125. // Look for the length modifier.
  126. if (ParseLengthModifier(FS, I, E) && I == E) {
  127. // No more characters left?
  128. H.HandleIncompleteSpecifier(Start, E - Start);
  129. return true;
  130. }
  131. if (*I == '\0') {
  132. // Detect spurious null characters, which are likely errors.
  133. H.HandleNullChar(I);
  134. return true;
  135. }
  136. // Finally, look for the conversion specifier.
  137. const char *conversionPosition = I++;
  138. ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
  139. switch (*conversionPosition) {
  140. default:
  141. break;
  142. // C99: 7.19.6.1 (section 8).
  143. case '%': k = ConversionSpecifier::PercentArg; break;
  144. case 'A': k = ConversionSpecifier::AArg; break;
  145. case 'E': k = ConversionSpecifier::EArg; break;
  146. case 'F': k = ConversionSpecifier::FArg; break;
  147. case 'G': k = ConversionSpecifier::GArg; break;
  148. case 'X': k = ConversionSpecifier::XArg; break;
  149. case 'a': k = ConversionSpecifier::aArg; break;
  150. case 'c': k = ConversionSpecifier::cArg; break;
  151. case 'd': k = ConversionSpecifier::dArg; break;
  152. case 'e': k = ConversionSpecifier::eArg; break;
  153. case 'f': k = ConversionSpecifier::fArg; break;
  154. case 'g': k = ConversionSpecifier::gArg; break;
  155. case 'i': k = ConversionSpecifier::iArg; break;
  156. case 'n': k = ConversionSpecifier::nArg; break;
  157. case 'o': k = ConversionSpecifier::oArg; break;
  158. case 'p': k = ConversionSpecifier::pArg; break;
  159. case 's': k = ConversionSpecifier::sArg; break;
  160. case 'u': k = ConversionSpecifier::uArg; break;
  161. case 'x': k = ConversionSpecifier::xArg; break;
  162. // Mac OS X (unicode) specific
  163. case 'C': k = ConversionSpecifier::CArg; break;
  164. case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
  165. // Objective-C.
  166. case '@': k = ConversionSpecifier::ObjCObjArg; break;
  167. // Glibc specific.
  168. case 'm': k = ConversionSpecifier::PrintErrno; break;
  169. }
  170. ConversionSpecifier CS(conversionPosition, k);
  171. FS.setConversionSpecifier(CS);
  172. if (CS.consumesDataArgument() && !FS.usesPositionalArg())
  173. FS.setArgIndex(argIndex++);
  174. if (k == ConversionSpecifier::InvalidSpecifier) {
  175. // Assume the conversion takes one argument.
  176. return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
  177. }
  178. return PrintfSpecifierResult(Start, FS);
  179. }
  180. bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
  181. const char *I,
  182. const char *E) {
  183. unsigned argIndex = 0;
  184. // Keep looking for a format specifier until we have exhausted the string.
  185. while (I != E) {
  186. const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
  187. // Did a fail-stop error of any kind occur when parsing the specifier?
  188. // If so, don't do any more processing.
  189. if (FSR.shouldStop())
  190. return true;;
  191. // Did we exhaust the string or encounter an error that
  192. // we can recover from?
  193. if (!FSR.hasValue())
  194. continue;
  195. // We have a format specifier. Pass it to the callback.
  196. if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
  197. I - FSR.getStart()))
  198. return true;
  199. }
  200. assert(I == E && "Format string not exhausted");
  201. return false;
  202. }
  203. //===----------------------------------------------------------------------===//
  204. // Methods on ConversionSpecifier.
  205. //===----------------------------------------------------------------------===//
  206. const char *ConversionSpecifier::toString() const {
  207. switch (kind) {
  208. case dArg: return "d";
  209. case iArg: return "i";
  210. case oArg: return "o";
  211. case uArg: return "u";
  212. case xArg: return "x";
  213. case XArg: return "X";
  214. case fArg: return "f";
  215. case FArg: return "F";
  216. case eArg: return "e";
  217. case EArg: return "E";
  218. case gArg: return "g";
  219. case GArg: return "G";
  220. case aArg: return "a";
  221. case AArg: return "A";
  222. case cArg: return "c";
  223. case sArg: return "s";
  224. case pArg: return "p";
  225. case nArg: return "n";
  226. case PercentArg: return "%";
  227. case InvalidSpecifier: return NULL;
  228. // MacOS X unicode extensions.
  229. case CArg: return "C";
  230. case UnicodeStrArg: return "S";
  231. // Objective-C specific specifiers.
  232. case ObjCObjArg: return "@";
  233. // GlibC specific specifiers.
  234. case PrintErrno: return "m";
  235. }
  236. return NULL;
  237. }
  238. //===----------------------------------------------------------------------===//
  239. // Methods on PrintfSpecifier.
  240. //===----------------------------------------------------------------------===//
  241. ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
  242. if (!CS.consumesDataArgument())
  243. return ArgTypeResult::Invalid();
  244. if (CS.isIntArg())
  245. switch (LM.getKind()) {
  246. case LengthModifier::AsLongDouble:
  247. return ArgTypeResult::Invalid();
  248. case LengthModifier::None: return Ctx.IntTy;
  249. case LengthModifier::AsChar: return Ctx.SignedCharTy;
  250. case LengthModifier::AsShort: return Ctx.ShortTy;
  251. case LengthModifier::AsLong: return Ctx.LongTy;
  252. case LengthModifier::AsLongLong: return Ctx.LongLongTy;
  253. case LengthModifier::AsIntMax:
  254. // FIXME: Return unknown for now.
  255. return ArgTypeResult();
  256. case LengthModifier::AsSizeT: return Ctx.getSizeType();
  257. case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
  258. }
  259. if (CS.isUIntArg())
  260. switch (LM.getKind()) {
  261. case LengthModifier::AsLongDouble:
  262. return ArgTypeResult::Invalid();
  263. case LengthModifier::None: return Ctx.UnsignedIntTy;
  264. case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
  265. case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
  266. case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
  267. case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
  268. case LengthModifier::AsIntMax:
  269. // FIXME: Return unknown for now.
  270. return ArgTypeResult();
  271. case LengthModifier::AsSizeT:
  272. // FIXME: How to get the corresponding unsigned
  273. // version of size_t?
  274. return ArgTypeResult();
  275. case LengthModifier::AsPtrDiff:
  276. // FIXME: How to get the corresponding unsigned
  277. // version of ptrdiff_t?
  278. return ArgTypeResult();
  279. }
  280. if (CS.isDoubleArg()) {
  281. if (LM.getKind() == LengthModifier::AsLongDouble)
  282. return Ctx.LongDoubleTy;
  283. return Ctx.DoubleTy;
  284. }
  285. switch (CS.getKind()) {
  286. case ConversionSpecifier::sArg:
  287. return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
  288. ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
  289. case ConversionSpecifier::UnicodeStrArg:
  290. // FIXME: This appears to be Mac OS X specific.
  291. return ArgTypeResult::WCStrTy;
  292. case ConversionSpecifier::CArg:
  293. return Ctx.WCharTy;
  294. case ConversionSpecifier::pArg:
  295. return ArgTypeResult::CPointerTy;
  296. default:
  297. break;
  298. }
  299. // FIXME: Handle other cases.
  300. return ArgTypeResult();
  301. }
  302. bool PrintfSpecifier::fixType(QualType QT) {
  303. // Handle strings first (char *, wchar_t *)
  304. if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
  305. CS.setKind(ConversionSpecifier::sArg);
  306. // Disable irrelevant flags
  307. HasAlternativeForm = 0;
  308. HasLeadingZeroes = 0;
  309. // Set the long length modifier for wide characters
  310. if (QT->getPointeeType()->isWideCharType())
  311. LM.setKind(LengthModifier::AsWideChar);
  312. return true;
  313. }
  314. // We can only work with builtin types.
  315. if (!QT->isBuiltinType())
  316. return false;
  317. // Everything else should be a base type
  318. const BuiltinType *BT = QT->getAs<BuiltinType>();
  319. // Set length modifier
  320. switch (BT->getKind()) {
  321. default:
  322. // The rest of the conversions are either optional or for non-builtin types
  323. LM.setKind(LengthModifier::None);
  324. break;
  325. case BuiltinType::WChar:
  326. case BuiltinType::Long:
  327. case BuiltinType::ULong:
  328. LM.setKind(LengthModifier::AsLong);
  329. break;
  330. case BuiltinType::LongLong:
  331. case BuiltinType::ULongLong:
  332. LM.setKind(LengthModifier::AsLongLong);
  333. break;
  334. case BuiltinType::LongDouble:
  335. LM.setKind(LengthModifier::AsLongDouble);
  336. break;
  337. }
  338. // Set conversion specifier and disable any flags which do not apply to it.
  339. if (QT->isAnyCharacterType()) {
  340. CS.setKind(ConversionSpecifier::cArg);
  341. Precision.setHowSpecified(OptionalAmount::NotSpecified);
  342. HasAlternativeForm = 0;
  343. HasLeadingZeroes = 0;
  344. HasPlusPrefix = 0;
  345. }
  346. // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
  347. else if (QT->isRealFloatingType()) {
  348. CS.setKind(ConversionSpecifier::fArg);
  349. }
  350. else if (QT->isPointerType()) {
  351. CS.setKind(ConversionSpecifier::pArg);
  352. Precision.setHowSpecified(OptionalAmount::NotSpecified);
  353. HasAlternativeForm = 0;
  354. HasLeadingZeroes = 0;
  355. HasPlusPrefix = 0;
  356. }
  357. else if (QT->isSignedIntegerType()) {
  358. CS.setKind(ConversionSpecifier::dArg);
  359. HasAlternativeForm = 0;
  360. }
  361. else if (QT->isUnsignedIntegerType()) {
  362. CS.setKind(ConversionSpecifier::uArg);
  363. HasAlternativeForm = 0;
  364. HasPlusPrefix = 0;
  365. }
  366. else {
  367. return false;
  368. }
  369. return true;
  370. }
  371. void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
  372. // Whilst some features have no defined order, we are using the order
  373. // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
  374. os << "%";
  375. // Positional args
  376. if (usesPositionalArg()) {
  377. os << getPositionalArgIndex() << "$";
  378. }
  379. // Conversion flags
  380. if (IsLeftJustified) os << "-";
  381. if (HasPlusPrefix) os << "+";
  382. if (HasSpacePrefix) os << " ";
  383. if (HasAlternativeForm) os << "#";
  384. if (HasLeadingZeroes) os << "0";
  385. // Minimum field width
  386. FieldWidth.toString(os);
  387. // Precision
  388. Precision.toString(os);
  389. // Length modifier
  390. os << LM.toString();
  391. // Conversion specifier
  392. os << CS.toString();
  393. }
  394. bool PrintfSpecifier::hasValidPlusPrefix() const {
  395. if (!HasPlusPrefix)
  396. return true;
  397. // The plus prefix only makes sense for signed conversions
  398. switch (CS.getKind()) {
  399. case ConversionSpecifier::dArg:
  400. case ConversionSpecifier::iArg:
  401. case ConversionSpecifier::fArg:
  402. case ConversionSpecifier::FArg:
  403. case ConversionSpecifier::eArg:
  404. case ConversionSpecifier::EArg:
  405. case ConversionSpecifier::gArg:
  406. case ConversionSpecifier::GArg:
  407. case ConversionSpecifier::aArg:
  408. case ConversionSpecifier::AArg:
  409. return true;
  410. default:
  411. return false;
  412. }
  413. }
  414. bool PrintfSpecifier::hasValidAlternativeForm() const {
  415. if (!HasAlternativeForm)
  416. return true;
  417. // Alternate form flag only valid with the oxaAeEfFgG conversions
  418. switch (CS.getKind()) {
  419. case ConversionSpecifier::oArg:
  420. case ConversionSpecifier::xArg:
  421. case ConversionSpecifier::aArg:
  422. case ConversionSpecifier::AArg:
  423. case ConversionSpecifier::eArg:
  424. case ConversionSpecifier::EArg:
  425. case ConversionSpecifier::fArg:
  426. case ConversionSpecifier::FArg:
  427. case ConversionSpecifier::gArg:
  428. case ConversionSpecifier::GArg:
  429. return true;
  430. default:
  431. return false;
  432. }
  433. }
  434. bool PrintfSpecifier::hasValidLeadingZeros() const {
  435. if (!HasLeadingZeroes)
  436. return true;
  437. // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
  438. switch (CS.getKind()) {
  439. case ConversionSpecifier::dArg:
  440. case ConversionSpecifier::iArg:
  441. case ConversionSpecifier::oArg:
  442. case ConversionSpecifier::uArg:
  443. case ConversionSpecifier::xArg:
  444. case ConversionSpecifier::XArg:
  445. case ConversionSpecifier::aArg:
  446. case ConversionSpecifier::AArg:
  447. case ConversionSpecifier::eArg:
  448. case ConversionSpecifier::EArg:
  449. case ConversionSpecifier::fArg:
  450. case ConversionSpecifier::FArg:
  451. case ConversionSpecifier::gArg:
  452. case ConversionSpecifier::GArg:
  453. return true;
  454. default:
  455. return false;
  456. }
  457. }
  458. bool PrintfSpecifier::hasValidSpacePrefix() const {
  459. if (!HasSpacePrefix)
  460. return true;
  461. // The space prefix only makes sense for signed conversions
  462. switch (CS.getKind()) {
  463. case ConversionSpecifier::dArg:
  464. case ConversionSpecifier::iArg:
  465. case ConversionSpecifier::fArg:
  466. case ConversionSpecifier::FArg:
  467. case ConversionSpecifier::eArg:
  468. case ConversionSpecifier::EArg:
  469. case ConversionSpecifier::gArg:
  470. case ConversionSpecifier::GArg:
  471. case ConversionSpecifier::aArg:
  472. case ConversionSpecifier::AArg:
  473. return true;
  474. default:
  475. return false;
  476. }
  477. }
  478. bool PrintfSpecifier::hasValidLeftJustified() const {
  479. if (!IsLeftJustified)
  480. return true;
  481. // The left justified flag is valid for all conversions except n
  482. switch (CS.getKind()) {
  483. case ConversionSpecifier::nArg:
  484. return false;
  485. default:
  486. return true;
  487. }
  488. }
  489. bool PrintfSpecifier::hasValidLengthModifier() const {
  490. switch (LM.getKind()) {
  491. case LengthModifier::None:
  492. return true;
  493. // Handle most integer flags
  494. case LengthModifier::AsChar:
  495. case LengthModifier::AsShort:
  496. case LengthModifier::AsLongLong:
  497. case LengthModifier::AsIntMax:
  498. case LengthModifier::AsSizeT:
  499. case LengthModifier::AsPtrDiff:
  500. switch (CS.getKind()) {
  501. case ConversionSpecifier::dArg:
  502. case ConversionSpecifier::iArg:
  503. case ConversionSpecifier::oArg:
  504. case ConversionSpecifier::uArg:
  505. case ConversionSpecifier::xArg:
  506. case ConversionSpecifier::XArg:
  507. case ConversionSpecifier::nArg:
  508. return true;
  509. default:
  510. return false;
  511. }
  512. // Handle 'l' flag
  513. case LengthModifier::AsLong:
  514. switch (CS.getKind()) {
  515. case ConversionSpecifier::dArg:
  516. case ConversionSpecifier::iArg:
  517. case ConversionSpecifier::oArg:
  518. case ConversionSpecifier::uArg:
  519. case ConversionSpecifier::xArg:
  520. case ConversionSpecifier::XArg:
  521. case ConversionSpecifier::aArg:
  522. case ConversionSpecifier::AArg:
  523. case ConversionSpecifier::fArg:
  524. case ConversionSpecifier::FArg:
  525. case ConversionSpecifier::eArg:
  526. case ConversionSpecifier::EArg:
  527. case ConversionSpecifier::gArg:
  528. case ConversionSpecifier::GArg:
  529. case ConversionSpecifier::nArg:
  530. case ConversionSpecifier::cArg:
  531. case ConversionSpecifier::sArg:
  532. return true;
  533. default:
  534. return false;
  535. }
  536. case LengthModifier::AsLongDouble:
  537. switch (CS.getKind()) {
  538. case ConversionSpecifier::aArg:
  539. case ConversionSpecifier::AArg:
  540. case ConversionSpecifier::fArg:
  541. case ConversionSpecifier::FArg:
  542. case ConversionSpecifier::eArg:
  543. case ConversionSpecifier::EArg:
  544. case ConversionSpecifier::gArg:
  545. case ConversionSpecifier::GArg:
  546. return true;
  547. default:
  548. return false;
  549. }
  550. }
  551. return false;
  552. }
  553. bool PrintfSpecifier::hasValidPrecision() const {
  554. if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
  555. return true;
  556. // Precision is only valid with the diouxXaAeEfFgGs conversions
  557. switch (CS.getKind()) {
  558. case ConversionSpecifier::dArg:
  559. case ConversionSpecifier::iArg:
  560. case ConversionSpecifier::oArg:
  561. case ConversionSpecifier::uArg:
  562. case ConversionSpecifier::xArg:
  563. case ConversionSpecifier::XArg:
  564. case ConversionSpecifier::aArg:
  565. case ConversionSpecifier::AArg:
  566. case ConversionSpecifier::eArg:
  567. case ConversionSpecifier::EArg:
  568. case ConversionSpecifier::fArg:
  569. case ConversionSpecifier::FArg:
  570. case ConversionSpecifier::gArg:
  571. case ConversionSpecifier::GArg:
  572. case ConversionSpecifier::sArg:
  573. return true;
  574. default:
  575. return false;
  576. }
  577. }
  578. bool PrintfSpecifier::hasValidFieldWidth() const {
  579. if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
  580. return true;
  581. // The field width is valid for all conversions except n
  582. switch (CS.getKind()) {
  583. case ConversionSpecifier::nArg:
  584. return false;
  585. default:
  586. return true;
  587. }
  588. }