ScanfFormatString.cpp 15 KB


  1. //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Handling of format string in scanf and friends. The structure of format
  11. // strings for fscanf() are described in C99 7.19.6.2.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Analysis/Analyses/FormatString.h"
  15. #include "FormatStringParsing.h"
  16. using clang::analyze_format_string::ArgTypeResult;
  17. using clang::analyze_format_string::FormatStringHandler;
  18. using clang::analyze_format_string::LengthModifier;
  19. using clang::analyze_format_string::OptionalAmount;
  20. using clang::analyze_format_string::ConversionSpecifier;
  21. using clang::analyze_scanf::ScanfArgTypeResult;
  22. using clang::analyze_scanf::ScanfConversionSpecifier;
  23. using clang::analyze_scanf::ScanfSpecifier;
  24. using clang::UpdateOnReturn;
  25. using namespace clang;
  26. typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
  27. ScanfSpecifierResult;
  28. static bool ParseScanList(FormatStringHandler &H,
  29. ScanfConversionSpecifier &CS,
  30. const char *&Beg, const char *E) {
  31. const char *I = Beg;
  32. const char *start = I - 1;
  33. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  34. // No more characters?
  35. if (I == E) {
  36. H.HandleIncompleteScanList(start, I);
  37. return true;
  38. }
  39. // Special case: ']' is the first character.
  40. if (*I == ']') {
  41. if (++I == E) {
  42. H.HandleIncompleteScanList(start, I - 1);
  43. return true;
  44. }
  45. }
  46. // Look for a ']' character which denotes the end of the scan list.
  47. while (*I != ']') {
  48. if (++I == E) {
  49. H.HandleIncompleteScanList(start, I - 1);
  50. return true;
  51. }
  52. }
  53. CS.setEndScanList(I);
  54. return false;
  55. }
  56. // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
  57. // We can possibly refactor.
  58. static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
  59. const char *&Beg,
  60. const char *E,
  61. unsigned &argIndex,
  62. const LangOptions &LO) {
  63. using namespace clang::analyze_scanf;
  64. const char *I = Beg;
  65. const char *Start = 0;
  66. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  67. // Look for a '%' character that indicates the start of a format specifier.
  68. for ( ; I != E ; ++I) {
  69. char c = *I;
  70. if (c == '\0') {
  71. // Detect spurious null characters, which are likely errors.
  72. H.HandleNullChar(I);
  73. return true;
  74. }
  75. if (c == '%') {
  76. Start = I++; // Record the start of the format specifier.
  77. break;
  78. }
  79. }
  80. // No format specifier found?
  81. if (!Start)
  82. return false;
  83. if (I == E) {
  84. // No more characters left?
  85. H.HandleIncompleteSpecifier(Start, E - Start);
  86. return true;
  87. }
  88. ScanfSpecifier FS;
  89. if (ParseArgPosition(H, FS, Start, I, E))
  90. return true;
  91. if (I == E) {
  92. // No more characters left?
  93. H.HandleIncompleteSpecifier(Start, E - Start);
  94. return true;
  95. }
  96. // Look for '*' flag if it is present.
  97. if (*I == '*') {
  98. FS.setSuppressAssignment(I);
  99. if (++I == E) {
  100. H.HandleIncompleteSpecifier(Start, E - Start);
  101. return true;
  102. }
  103. }
  104. // Look for the field width (if any). Unlike printf, this is either
  105. // a fixed integer or isn't present.
  106. const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
  107. if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
  108. assert(Amt.getHowSpecified() == OptionalAmount::Constant);
  109. FS.setFieldWidth(Amt);
  110. if (I == E) {
  111. // No more characters left?
  112. H.HandleIncompleteSpecifier(Start, E - Start);
  113. return true;
  114. }
  115. }
  116. // Look for the length modifier.
  117. if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
  118. // No more characters left?
  119. H.HandleIncompleteSpecifier(Start, E - Start);
  120. return true;
  121. }
  122. // Detect spurious null characters, which are likely errors.
  123. if (*I == '\0') {
  124. H.HandleNullChar(I);
  125. return true;
  126. }
  127. // Finally, look for the conversion specifier.
  128. const char *conversionPosition = I++;
  129. ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
  130. switch (*conversionPosition) {
  131. default:
  132. break;
  133. case '%': k = ConversionSpecifier::PercentArg; break;
  134. case 'A': k = ConversionSpecifier::AArg; break;
  135. case 'E': k = ConversionSpecifier::EArg; break;
  136. case 'F': k = ConversionSpecifier::FArg; break;
  137. case 'G': k = ConversionSpecifier::GArg; break;
  138. case 'X': k = ConversionSpecifier::XArg; break;
  139. case 'a': k = ConversionSpecifier::aArg; break;
  140. case 'd': k = ConversionSpecifier::dArg; break;
  141. case 'e': k = ConversionSpecifier::eArg; break;
  142. case 'f': k = ConversionSpecifier::fArg; break;
  143. case 'g': k = ConversionSpecifier::gArg; break;
  144. case 'i': k = ConversionSpecifier::iArg; break;
  145. case 'n': k = ConversionSpecifier::nArg; break;
  146. case 'c': k = ConversionSpecifier::cArg; break;
  147. case 'C': k = ConversionSpecifier::CArg; break;
  148. case 'S': k = ConversionSpecifier::SArg; break;
  149. case '[': k = ConversionSpecifier::ScanListArg; break;
  150. case 'u': k = ConversionSpecifier::uArg; break;
  151. case 'x': k = ConversionSpecifier::xArg; break;
  152. case 'o': k = ConversionSpecifier::oArg; break;
  153. case 's': k = ConversionSpecifier::sArg; break;
  154. case 'p': k = ConversionSpecifier::pArg; break;
  155. }
  156. ScanfConversionSpecifier CS(conversionPosition, k);
  157. if (k == ScanfConversionSpecifier::ScanListArg) {
  158. if (ParseScanList(H, CS, I, E))
  159. return true;
  160. }
  161. FS.setConversionSpecifier(CS);
  162. if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
  163. && !FS.usesPositionalArg())
  164. FS.setArgIndex(argIndex++);
  165. // FIXME: '%' and '*' doesn't make sense. Issue a warning.
  166. // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
  167. if (k == ScanfConversionSpecifier::InvalidSpecifier) {
  168. // Assume the conversion takes one argument.
  169. return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
  170. }
  171. return ScanfSpecifierResult(Start, FS);
  172. }
  173. ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
  174. const ScanfConversionSpecifier &CS = getConversionSpecifier();
  175. if (!CS.consumesDataArgument())
  176. return ScanfArgTypeResult::Invalid();
  177. switch(CS.getKind()) {
  178. // Signed int.
  179. case ConversionSpecifier::dArg:
  180. case ConversionSpecifier::iArg:
  181. switch (LM.getKind()) {
  182. case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
  183. case LengthModifier::AsChar:
  184. return ArgTypeResult(ArgTypeResult::AnyCharTy);
  185. case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
  186. case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
  187. case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy);
  188. case LengthModifier::AsIntMax:
  189. return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
  190. case LengthModifier::AsSizeT:
  191. // FIXME: ssize_t.
  192. return ScanfArgTypeResult();
  193. case LengthModifier::AsPtrDiff:
  194. return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
  195. case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
  196. case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
  197. }
  198. // Unsigned int.
  199. case ConversionSpecifier::oArg:
  200. case ConversionSpecifier::uArg:
  201. case ConversionSpecifier::xArg:
  202. case ConversionSpecifier::XArg:
  203. switch (LM.getKind()) {
  204. case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
  205. case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
  206. case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
  207. case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
  208. case LengthModifier::AsLongLong:
  209. return ArgTypeResult(Ctx.UnsignedLongLongTy);
  210. case LengthModifier::AsIntMax:
  211. return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
  212. case LengthModifier::AsSizeT:
  213. return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
  214. case LengthModifier::AsPtrDiff:
  215. // FIXME: Unsigned version of ptrdiff_t?
  216. return ScanfArgTypeResult();
  217. case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
  218. case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
  219. }
  220. // Float.
  221. case ConversionSpecifier::aArg:
  222. case ConversionSpecifier::AArg:
  223. case ConversionSpecifier::eArg:
  224. case ConversionSpecifier::EArg:
  225. case ConversionSpecifier::fArg:
  226. case ConversionSpecifier::FArg:
  227. case ConversionSpecifier::gArg:
  228. case ConversionSpecifier::GArg:
  229. switch (LM.getKind()) {
  230. case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
  231. case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
  232. case LengthModifier::AsLongDouble:
  233. return ArgTypeResult(Ctx.LongDoubleTy);
  234. default:
  235. return ScanfArgTypeResult::Invalid();
  236. }
  237. // Char, string and scanlist.
  238. case ConversionSpecifier::cArg:
  239. case ConversionSpecifier::sArg:
  240. case ConversionSpecifier::ScanListArg:
  241. switch (LM.getKind()) {
  242. case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
  243. case LengthModifier::AsLong:
  244. return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
  245. default:
  246. return ScanfArgTypeResult::Invalid();
  247. }
  248. case ConversionSpecifier::CArg:
  249. case ConversionSpecifier::SArg:
  250. // FIXME: Mac OS X specific?
  251. if (LM.getKind() == LengthModifier::None)
  252. return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
  253. return ScanfArgTypeResult::Invalid();
  254. // Pointer.
  255. case ConversionSpecifier::pArg:
  256. return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
  257. default:
  258. break;
  259. }
  260. return ScanfArgTypeResult();
  261. }
  262. bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt)
  263. {
  264. if (!QT->isPointerType())
  265. return false;
  266. QualType PT = QT->getPointeeType();
  267. const BuiltinType *BT = PT->getAs<BuiltinType>();
  268. if (!BT)
  269. return false;
  270. // Pointer to a character.
  271. if (PT->isAnyCharacterType()) {
  272. CS.setKind(ConversionSpecifier::sArg);
  273. if (PT->isWideCharType())
  274. LM.setKind(LengthModifier::AsWideChar);
  275. else
  276. LM.setKind(LengthModifier::None);
  277. return true;
  278. }
  279. // Figure out the length modifier.
  280. switch (BT->getKind()) {
  281. // no modifier
  282. case BuiltinType::UInt:
  283. case BuiltinType::Int:
  284. case BuiltinType::Float:
  285. LM.setKind(LengthModifier::None);
  286. break;
  287. // hh
  288. case BuiltinType::Char_U:
  289. case BuiltinType::UChar:
  290. case BuiltinType::Char_S:
  291. case BuiltinType::SChar:
  292. LM.setKind(LengthModifier::AsChar);
  293. break;
  294. // h
  295. case BuiltinType::Short:
  296. case BuiltinType::UShort:
  297. LM.setKind(LengthModifier::AsShort);
  298. break;
  299. // l
  300. case BuiltinType::Long:
  301. case BuiltinType::ULong:
  302. case BuiltinType::Double:
  303. LM.setKind(LengthModifier::AsLong);
  304. break;
  305. // ll
  306. case BuiltinType::LongLong:
  307. case BuiltinType::ULongLong:
  308. LM.setKind(LengthModifier::AsLongLong);
  309. break;
  310. // L
  311. case BuiltinType::LongDouble:
  312. LM.setKind(LengthModifier::AsLongDouble);
  313. break;
  314. // Don't know.
  315. default:
  316. return false;
  317. }
  318. // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
  319. if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
  320. const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
  321. if (Identifier->getName() == "size_t") {
  322. LM.setKind(LengthModifier::AsSizeT);
  323. } else if (Identifier->getName() == "ssize_t") {
  324. // Not C99, but common in Unix.
  325. LM.setKind(LengthModifier::AsSizeT);
  326. } else if (Identifier->getName() == "intmax_t") {
  327. LM.setKind(LengthModifier::AsIntMax);
  328. } else if (Identifier->getName() == "uintmax_t") {
  329. LM.setKind(LengthModifier::AsIntMax);
  330. } else if (Identifier->getName() == "ptrdiff_t") {
  331. LM.setKind(LengthModifier::AsPtrDiff);
  332. }
  333. }
  334. // Figure out the conversion specifier.
  335. if (PT->isRealFloatingType())
  336. CS.setKind(ConversionSpecifier::fArg);
  337. else if (PT->isSignedIntegerType())
  338. CS.setKind(ConversionSpecifier::dArg);
  339. else if (PT->isUnsignedIntegerType()) {
  340. // Preserve the original formatting, e.g. 'X', 'o'.
  341. if (!CS.isUIntArg()) {
  342. CS.setKind(ConversionSpecifier::uArg);
  343. }
  344. } else
  345. llvm_unreachable("Unexpected type");
  346. return true;
  347. }
  348. void ScanfSpecifier::toString(raw_ostream &os) const {
  349. os << "%";
  350. if (usesPositionalArg())
  351. os << getPositionalArgIndex() << "$";
  352. if (SuppressAssignment)
  353. os << "*";
  354. FieldWidth.toString(os);
  355. os << LM.toString();
  356. os << CS.toString();
  357. }
  358. bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
  359. const char *I,
  360. const char *E,
  361. const LangOptions &LO) {
  362. unsigned argIndex = 0;
  363. // Keep looking for a format specifier until we have exhausted the string.
  364. while (I != E) {
  365. const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
  366. LO);
  367. // Did a fail-stop error of any kind occur when parsing the specifier?
  368. // If so, don't do any more processing.
  369. if (FSR.shouldStop())
  370. return true;;
  371. // Did we exhaust the string or encounter an error that
  372. // we can recover from?
  373. if (!FSR.hasValue())
  374. continue;
  375. // We have a format specifier. Pass it to the callback.
  376. if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
  377. I - FSR.getStart())) {
  378. return true;
  379. }
  380. }
  381. assert(I == E && "Format string not exhausted");
  382. return false;
  383. }
  384. bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
  385. switch (K) {
  386. case InvalidTy:
  387. llvm_unreachable("ArgTypeResult must be valid");
  388. case UnknownTy:
  389. return true;
  390. case CStrTy:
  391. return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
  392. case WCStrTy:
  393. return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
  394. case PtrToArgTypeResultTy: {
  395. const PointerType *PT = argTy->getAs<PointerType>();
  396. if (!PT)
  397. return false;
  398. return A.matchesType(C, PT->getPointeeType());
  399. }
  400. }
  401. return false; // Unreachable, but we still get a warning.
  402. }
  403. QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
  404. switch (K) {
  405. case InvalidTy:
  406. llvm_unreachable("No representative type for Invalid ArgTypeResult");
  407. case UnknownTy:
  408. return QualType();
  409. case CStrTy:
  410. return C.getPointerType(C.CharTy);
  411. case WCStrTy:
  412. return C.getPointerType(C.getWCharType());
  413. case PtrToArgTypeResultTy:
  414. return C.getPointerType(A.getRepresentativeType(C));
  415. }
  416. return QualType(); // Not reachable.
  417. }
  418. std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
  419. std::string S = getRepresentativeType(C).getAsString();
  420. if (!Name)
  421. return std::string("'") + S + "'";
  422. return std::string("'") + Name + "' (aka '" + S + "')";
  423. }