ScanfFormatString.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Handling of format string in scanf and friends. The structure of format
  11. // strings for fscanf() are described in C99 7.19.6.2.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Analysis/Analyses/FormatString.h"
  15. #include "FormatStringParsing.h"
  16. using clang::analyze_format_string::ArgTypeResult;
  17. using clang::analyze_format_string::FormatStringHandler;
  18. using clang::analyze_format_string::LengthModifier;
  19. using clang::analyze_format_string::OptionalAmount;
  20. using clang::analyze_format_string::ConversionSpecifier;
  21. using clang::analyze_scanf::ScanfArgTypeResult;
  22. using clang::analyze_scanf::ScanfConversionSpecifier;
  23. using clang::analyze_scanf::ScanfSpecifier;
  24. using clang::UpdateOnReturn;
  25. using namespace clang;
  26. typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
  27. ScanfSpecifierResult;
  28. static bool ParseScanList(FormatStringHandler &H,
  29. ScanfConversionSpecifier &CS,
  30. const char *&Beg, const char *E) {
  31. const char *I = Beg;
  32. const char *start = I - 1;
  33. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  34. // No more characters?
  35. if (I == E) {
  36. H.HandleIncompleteScanList(start, I);
  37. return true;
  38. }
  39. // Special case: ']' is the first character.
  40. if (*I == ']') {
  41. if (++I == E) {
  42. H.HandleIncompleteScanList(start, I - 1);
  43. return true;
  44. }
  45. }
  46. // Look for a ']' character which denotes the end of the scan list.
  47. while (*I != ']') {
  48. if (++I == E) {
  49. H.HandleIncompleteScanList(start, I - 1);
  50. return true;
  51. }
  52. }
  53. CS.setEndScanList(I);
  54. return false;
  55. }
  56. // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
  57. // We can possibly refactor.
  58. static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
  59. const char *&Beg,
  60. const char *E,
  61. unsigned &argIndex,
  62. const LangOptions &LO) {
  63. using namespace clang::analyze_scanf;
  64. const char *I = Beg;
  65. const char *Start = 0;
  66. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  67. // Look for a '%' character that indicates the start of a format specifier.
  68. for ( ; I != E ; ++I) {
  69. char c = *I;
  70. if (c == '\0') {
  71. // Detect spurious null characters, which are likely errors.
  72. H.HandleNullChar(I);
  73. return true;
  74. }
  75. if (c == '%') {
  76. Start = I++; // Record the start of the format specifier.
  77. break;
  78. }
  79. }
  80. // No format specifier found?
  81. if (!Start)
  82. return false;
  83. if (I == E) {
  84. // No more characters left?
  85. H.HandleIncompleteSpecifier(Start, E - Start);
  86. return true;
  87. }
  88. ScanfSpecifier FS;
  89. if (ParseArgPosition(H, FS, Start, I, E))
  90. return true;
  91. if (I == E) {
  92. // No more characters left?
  93. H.HandleIncompleteSpecifier(Start, E - Start);
  94. return true;
  95. }
  96. // Look for '*' flag if it is present.
  97. if (*I == '*') {
  98. FS.setSuppressAssignment(I);
  99. if (++I == E) {
  100. H.HandleIncompleteSpecifier(Start, E - Start);
  101. return true;
  102. }
  103. }
  104. // Look for the field width (if any). Unlike printf, this is either
  105. // a fixed integer or isn't present.
  106. const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
  107. if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
  108. assert(Amt.getHowSpecified() == OptionalAmount::Constant);
  109. FS.setFieldWidth(Amt);
  110. if (I == E) {
  111. // No more characters left?
  112. H.HandleIncompleteSpecifier(Start, E - Start);
  113. return true;
  114. }
  115. }
  116. // Look for the length modifier.
  117. if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
  118. // No more characters left?
  119. H.HandleIncompleteSpecifier(Start, E - Start);
  120. return true;
  121. }
  122. // Detect spurious null characters, which are likely errors.
  123. if (*I == '\0') {
  124. H.HandleNullChar(I);
  125. return true;
  126. }
  127. // Finally, look for the conversion specifier.
  128. const char *conversionPosition = I++;
  129. ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
  130. switch (*conversionPosition) {
  131. default:
  132. break;
  133. case '%': k = ConversionSpecifier::PercentArg; break;
  134. case 'A': k = ConversionSpecifier::AArg; break;
  135. case 'E': k = ConversionSpecifier::EArg; break;
  136. case 'F': k = ConversionSpecifier::FArg; break;
  137. case 'G': k = ConversionSpecifier::GArg; break;
  138. case 'X': k = ConversionSpecifier::XArg; break;
  139. case 'a': k = ConversionSpecifier::aArg; break;
  140. case 'd': k = ConversionSpecifier::dArg; break;
  141. case 'e': k = ConversionSpecifier::eArg; break;
  142. case 'f': k = ConversionSpecifier::fArg; break;
  143. case 'g': k = ConversionSpecifier::gArg; break;
  144. case 'i': k = ConversionSpecifier::iArg; break;
  145. case 'n': k = ConversionSpecifier::nArg; break;
  146. case 'c': k = ConversionSpecifier::cArg; break;
  147. case 'C': k = ConversionSpecifier::CArg; break;
  148. case 'S': k = ConversionSpecifier::SArg; break;
  149. case '[': k = ConversionSpecifier::ScanListArg; break;
  150. case 'u': k = ConversionSpecifier::uArg; break;
  151. case 'x': k = ConversionSpecifier::xArg; break;
  152. case 'o': k = ConversionSpecifier::oArg; break;
  153. case 's': k = ConversionSpecifier::sArg; break;
  154. case 'p': k = ConversionSpecifier::pArg; break;
  155. }
  156. ScanfConversionSpecifier CS(conversionPosition, k);
  157. if (k == ScanfConversionSpecifier::ScanListArg) {
  158. if (ParseScanList(H, CS, I, E))
  159. return true;
  160. }
  161. FS.setConversionSpecifier(CS);
  162. if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
  163. && !FS.usesPositionalArg())
  164. FS.setArgIndex(argIndex++);
  165. // FIXME: '%' and '*' doesn't make sense. Issue a warning.
  166. // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
  167. if (k == ScanfConversionSpecifier::InvalidSpecifier) {
  168. // Assume the conversion takes one argument.
  169. return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
  170. }
  171. return ScanfSpecifierResult(Start, FS);
  172. }
  173. ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
  174. const ScanfConversionSpecifier &CS = getConversionSpecifier();
  175. if (!CS.consumesDataArgument())
  176. return ScanfArgTypeResult::Invalid();
  177. switch(CS.getKind()) {
  178. // Signed int.
  179. case ConversionSpecifier::dArg:
  180. case ConversionSpecifier::iArg:
  181. switch (LM.getKind()) {
  182. case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
  183. case LengthModifier::AsChar:
  184. return ArgTypeResult(ArgTypeResult::AnyCharTy);
  185. case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
  186. case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
  187. case LengthModifier::AsLongLong:
  188. case LengthModifier::AsQuad:
  189. return ArgTypeResult(Ctx.LongLongTy);
  190. case LengthModifier::AsIntMax:
  191. return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
  192. case LengthModifier::AsSizeT:
  193. // FIXME: ssize_t.
  194. return ScanfArgTypeResult();
  195. case LengthModifier::AsPtrDiff:
  196. return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
  197. case LengthModifier::AsLongDouble:
  198. // GNU extension.
  199. return ArgTypeResult(Ctx.LongLongTy);
  200. case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
  201. case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
  202. }
  203. // Unsigned int.
  204. case ConversionSpecifier::oArg:
  205. case ConversionSpecifier::uArg:
  206. case ConversionSpecifier::xArg:
  207. case ConversionSpecifier::XArg:
  208. switch (LM.getKind()) {
  209. case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
  210. case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
  211. case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
  212. case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
  213. case LengthModifier::AsLongLong:
  214. case LengthModifier::AsQuad:
  215. return ArgTypeResult(Ctx.UnsignedLongLongTy);
  216. case LengthModifier::AsIntMax:
  217. return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
  218. case LengthModifier::AsSizeT:
  219. return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
  220. case LengthModifier::AsPtrDiff:
  221. // FIXME: Unsigned version of ptrdiff_t?
  222. return ScanfArgTypeResult();
  223. case LengthModifier::AsLongDouble:
  224. // GNU extension.
  225. return ArgTypeResult(Ctx.UnsignedLongLongTy);
  226. case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
  227. case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
  228. }
  229. // Float.
  230. case ConversionSpecifier::aArg:
  231. case ConversionSpecifier::AArg:
  232. case ConversionSpecifier::eArg:
  233. case ConversionSpecifier::EArg:
  234. case ConversionSpecifier::fArg:
  235. case ConversionSpecifier::FArg:
  236. case ConversionSpecifier::gArg:
  237. case ConversionSpecifier::GArg:
  238. switch (LM.getKind()) {
  239. case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
  240. case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
  241. case LengthModifier::AsLongDouble:
  242. return ArgTypeResult(Ctx.LongDoubleTy);
  243. default:
  244. return ScanfArgTypeResult::Invalid();
  245. }
  246. // Char, string and scanlist.
  247. case ConversionSpecifier::cArg:
  248. case ConversionSpecifier::sArg:
  249. case ConversionSpecifier::ScanListArg:
  250. switch (LM.getKind()) {
  251. case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
  252. case LengthModifier::AsLong:
  253. return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
  254. case LengthModifier::AsAllocate:
  255. case LengthModifier::AsMAllocate:
  256. return ScanfArgTypeResult(ArgTypeResult::CStrTy);
  257. default:
  258. return ScanfArgTypeResult::Invalid();
  259. }
  260. case ConversionSpecifier::CArg:
  261. case ConversionSpecifier::SArg:
  262. // FIXME: Mac OS X specific?
  263. switch (LM.getKind()) {
  264. case LengthModifier::None:
  265. return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
  266. case LengthModifier::AsAllocate:
  267. case LengthModifier::AsMAllocate:
  268. return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
  269. default:
  270. return ScanfArgTypeResult::Invalid();
  271. }
  272. // Pointer.
  273. case ConversionSpecifier::pArg:
  274. return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
  275. case ConversionSpecifier::nArg:
  276. return ArgTypeResult(Ctx.IntTy);
  277. default:
  278. break;
  279. }
  280. return ScanfArgTypeResult();
  281. }
  282. bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
  283. ASTContext &Ctx) {
  284. if (!QT->isPointerType())
  285. return false;
  286. // %n is different from other conversion specifiers; don't try to fix it.
  287. if (CS.getKind() == ConversionSpecifier::nArg)
  288. return false;
  289. QualType PT = QT->getPointeeType();
  290. // If it's an enum, get its underlying type.
  291. if (const EnumType *ETy = QT->getAs<EnumType>())
  292. QT = ETy->getDecl()->getIntegerType();
  293. const BuiltinType *BT = PT->getAs<BuiltinType>();
  294. if (!BT)
  295. return false;
  296. // Pointer to a character.
  297. if (PT->isAnyCharacterType()) {
  298. CS.setKind(ConversionSpecifier::sArg);
  299. if (PT->isWideCharType())
  300. LM.setKind(LengthModifier::AsWideChar);
  301. else
  302. LM.setKind(LengthModifier::None);
  303. return true;
  304. }
  305. // Figure out the length modifier.
  306. switch (BT->getKind()) {
  307. // no modifier
  308. case BuiltinType::UInt:
  309. case BuiltinType::Int:
  310. case BuiltinType::Float:
  311. LM.setKind(LengthModifier::None);
  312. break;
  313. // hh
  314. case BuiltinType::Char_U:
  315. case BuiltinType::UChar:
  316. case BuiltinType::Char_S:
  317. case BuiltinType::SChar:
  318. LM.setKind(LengthModifier::AsChar);
  319. break;
  320. // h
  321. case BuiltinType::Short:
  322. case BuiltinType::UShort:
  323. LM.setKind(LengthModifier::AsShort);
  324. break;
  325. // l
  326. case BuiltinType::Long:
  327. case BuiltinType::ULong:
  328. case BuiltinType::Double:
  329. LM.setKind(LengthModifier::AsLong);
  330. break;
  331. // ll
  332. case BuiltinType::LongLong:
  333. case BuiltinType::ULongLong:
  334. LM.setKind(LengthModifier::AsLongLong);
  335. break;
  336. // L
  337. case BuiltinType::LongDouble:
  338. LM.setKind(LengthModifier::AsLongDouble);
  339. break;
  340. // Don't know.
  341. default:
  342. return false;
  343. }
  344. // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
  345. if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
  346. namedTypeToLengthModifier(PT, LM);
  347. // If fixing the length modifier was enough, we are done.
  348. const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
  349. if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
  350. return true;
  351. // Figure out the conversion specifier.
  352. if (PT->isRealFloatingType())
  353. CS.setKind(ConversionSpecifier::fArg);
  354. else if (PT->isSignedIntegerType())
  355. CS.setKind(ConversionSpecifier::dArg);
  356. else if (PT->isUnsignedIntegerType())
  357. CS.setKind(ConversionSpecifier::uArg);
  358. else
  359. llvm_unreachable("Unexpected type");
  360. return true;
  361. }
  362. void ScanfSpecifier::toString(raw_ostream &os) const {
  363. os << "%";
  364. if (usesPositionalArg())
  365. os << getPositionalArgIndex() << "$";
  366. if (SuppressAssignment)
  367. os << "*";
  368. FieldWidth.toString(os);
  369. os << LM.toString();
  370. os << CS.toString();
  371. }
  372. bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
  373. const char *I,
  374. const char *E,
  375. const LangOptions &LO) {
  376. unsigned argIndex = 0;
  377. // Keep looking for a format specifier until we have exhausted the string.
  378. while (I != E) {
  379. const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
  380. LO);
  381. // Did a fail-stop error of any kind occur when parsing the specifier?
  382. // If so, don't do any more processing.
  383. if (FSR.shouldStop())
  384. return true;;
  385. // Did we exhaust the string or encounter an error that
  386. // we can recover from?
  387. if (!FSR.hasValue())
  388. continue;
  389. // We have a format specifier. Pass it to the callback.
  390. if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
  391. I - FSR.getStart())) {
  392. return true;
  393. }
  394. }
  395. assert(I == E && "Format string not exhausted");
  396. return false;
  397. }
  398. bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
  399. switch (K) {
  400. case InvalidTy:
  401. llvm_unreachable("ArgTypeResult must be valid");
  402. case UnknownTy:
  403. return true;
  404. case CStrTy:
  405. return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
  406. case WCStrTy:
  407. return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
  408. case PtrToArgTypeResultTy: {
  409. const PointerType *PT = argTy->getAs<PointerType>();
  410. if (!PT)
  411. return false;
  412. return A.matchesType(C, PT->getPointeeType());
  413. }
  414. }
  415. llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
  416. }
  417. QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
  418. switch (K) {
  419. case InvalidTy:
  420. llvm_unreachable("No representative type for Invalid ArgTypeResult");
  421. case UnknownTy:
  422. return QualType();
  423. case CStrTy:
  424. return C.getPointerType(C.CharTy);
  425. case WCStrTy:
  426. return C.getPointerType(C.getWCharType());
  427. case PtrToArgTypeResultTy:
  428. return C.getPointerType(A.getRepresentativeType(C));
  429. }
  430. llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
  431. }
  432. std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
  433. std::string S = getRepresentativeType(C).getAsString();
  434. if (!Name)
  435. return std::string("'") + S + "'";
  436. return std::string("'") + Name + "' (aka '" + S + "')";
  437. }