FormatString.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Shared details for processing format strings of printf and scanf
  11. // (and friends).
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "FormatStringParsing.h"
  15. using clang::analyze_format_string::ArgTypeResult;
  16. using clang::analyze_format_string::FormatStringHandler;
  17. using clang::analyze_format_string::FormatSpecifier;
  18. using clang::analyze_format_string::LengthModifier;
  19. using clang::analyze_format_string::OptionalAmount;
  20. using clang::analyze_format_string::PositionContext;
  21. using clang::analyze_format_string::ConversionSpecifier;
  22. using namespace clang;
  23. // Key function to FormatStringHandler.
  24. FormatStringHandler::~FormatStringHandler() {}
  25. //===----------------------------------------------------------------------===//
  26. // Functions for parsing format strings components in both printf and
  27. // scanf format strings.
  28. //===----------------------------------------------------------------------===//
  29. OptionalAmount
  30. clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
  31. const char *I = Beg;
  32. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  33. unsigned accumulator = 0;
  34. bool hasDigits = false;
  35. for ( ; I != E; ++I) {
  36. char c = *I;
  37. if (c >= '0' && c <= '9') {
  38. hasDigits = true;
  39. accumulator = (accumulator * 10) + (c - '0');
  40. continue;
  41. }
  42. if (hasDigits)
  43. return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
  44. false);
  45. break;
  46. }
  47. return OptionalAmount();
  48. }
  49. OptionalAmount
  50. clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
  51. const char *E,
  52. unsigned &argIndex) {
  53. if (*Beg == '*') {
  54. ++Beg;
  55. return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
  56. }
  57. return ParseAmount(Beg, E);
  58. }
  59. OptionalAmount
  60. clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
  61. const char *Start,
  62. const char *&Beg,
  63. const char *E,
  64. PositionContext p) {
  65. if (*Beg == '*') {
  66. const char *I = Beg + 1;
  67. const OptionalAmount &Amt = ParseAmount(I, E);
  68. if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
  69. H.HandleInvalidPosition(Beg, I - Beg, p);
  70. return OptionalAmount(false);
  71. }
  72. if (I == E) {
  73. // No more characters left?
  74. H.HandleIncompleteSpecifier(Start, E - Start);
  75. return OptionalAmount(false);
  76. }
  77. assert(Amt.getHowSpecified() == OptionalAmount::Constant);
  78. if (*I == '$') {
  79. // Handle positional arguments
  80. // Special case: '*0$', since this is an easy mistake.
  81. if (Amt.getConstantAmount() == 0) {
  82. H.HandleZeroPosition(Beg, I - Beg + 1);
  83. return OptionalAmount(false);
  84. }
  85. const char *Tmp = Beg;
  86. Beg = ++I;
  87. return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
  88. Tmp, 0, true);
  89. }
  90. H.HandleInvalidPosition(Beg, I - Beg, p);
  91. return OptionalAmount(false);
  92. }
  93. return ParseAmount(Beg, E);
  94. }
  95. bool
  96. clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
  97. FormatSpecifier &CS,
  98. const char *Start,
  99. const char *&Beg, const char *E,
  100. unsigned *argIndex) {
  101. // FIXME: Support negative field widths.
  102. if (argIndex) {
  103. CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
  104. }
  105. else {
  106. const OptionalAmount Amt =
  107. ParsePositionAmount(H, Start, Beg, E,
  108. analyze_format_string::FieldWidthPos);
  109. if (Amt.isInvalid())
  110. return true;
  111. CS.setFieldWidth(Amt);
  112. }
  113. return false;
  114. }
  115. bool
  116. clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
  117. FormatSpecifier &FS,
  118. const char *Start,
  119. const char *&Beg,
  120. const char *E) {
  121. const char *I = Beg;
  122. const OptionalAmount &Amt = ParseAmount(I, E);
  123. if (I == E) {
  124. // No more characters left?
  125. H.HandleIncompleteSpecifier(Start, E - Start);
  126. return true;
  127. }
  128. if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
  129. // Special case: '%0$', since this is an easy mistake.
  130. if (Amt.getConstantAmount() == 0) {
  131. H.HandleZeroPosition(Start, I - Start);
  132. return true;
  133. }
  134. FS.setArgIndex(Amt.getConstantAmount() - 1);
  135. FS.setUsesPositionalArg();
  136. // Update the caller's pointer if we decided to consume
  137. // these characters.
  138. Beg = I;
  139. return false;
  140. }
  141. return false;
  142. }
  143. bool
  144. clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
  145. const char *&I,
  146. const char *E) {
  147. LengthModifier::Kind lmKind = LengthModifier::None;
  148. const char *lmPosition = I;
  149. switch (*I) {
  150. default:
  151. return false;
  152. case 'h':
  153. ++I;
  154. lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
  155. : LengthModifier::AsShort;
  156. break;
  157. case 'l':
  158. ++I;
  159. lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
  160. : LengthModifier::AsLong;
  161. break;
  162. case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
  163. case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
  164. case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
  165. case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
  166. case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
  167. }
  168. LengthModifier lm(lmPosition, lmKind);
  169. FS.setLengthModifier(lm);
  170. return true;
  171. }
  172. //===----------------------------------------------------------------------===//
  173. // Methods on ArgTypeResult.
  174. //===----------------------------------------------------------------------===//
  175. bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
  176. switch (K) {
  177. case InvalidTy:
  178. llvm_unreachable("ArgTypeResult must be valid");
  179. case UnknownTy:
  180. return true;
  181. case AnyCharTy: {
  182. if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
  183. switch (BT->getKind()) {
  184. default:
  185. break;
  186. case BuiltinType::Char_S:
  187. case BuiltinType::SChar:
  188. case BuiltinType::UChar:
  189. case BuiltinType::Char_U:
  190. return true;
  191. }
  192. return false;
  193. }
  194. case SpecificTy: {
  195. argTy = C.getCanonicalType(argTy).getUnqualifiedType();
  196. if (T == argTy)
  197. return true;
  198. // Check for "compatible types".
  199. if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
  200. switch (BT->getKind()) {
  201. default:
  202. break;
  203. case BuiltinType::Char_S:
  204. case BuiltinType::SChar:
  205. return T == C.UnsignedCharTy;
  206. case BuiltinType::Char_U:
  207. case BuiltinType::UChar:
  208. return T == C.SignedCharTy;
  209. case BuiltinType::Short:
  210. return T == C.UnsignedShortTy;
  211. case BuiltinType::UShort:
  212. return T == C.ShortTy;
  213. case BuiltinType::Int:
  214. return T == C.UnsignedIntTy;
  215. case BuiltinType::UInt:
  216. return T == C.IntTy;
  217. case BuiltinType::Long:
  218. return T == C.UnsignedLongTy;
  219. case BuiltinType::ULong:
  220. return T == C.LongTy;
  221. case BuiltinType::LongLong:
  222. return T == C.UnsignedLongLongTy;
  223. case BuiltinType::ULongLong:
  224. return T == C.LongLongTy;
  225. }
  226. return false;
  227. }
  228. case CStrTy: {
  229. const PointerType *PT = argTy->getAs<PointerType>();
  230. if (!PT)
  231. return false;
  232. QualType pointeeTy = PT->getPointeeType();
  233. if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
  234. switch (BT->getKind()) {
  235. case BuiltinType::Void:
  236. case BuiltinType::Char_U:
  237. case BuiltinType::UChar:
  238. case BuiltinType::Char_S:
  239. case BuiltinType::SChar:
  240. return true;
  241. default:
  242. break;
  243. }
  244. return false;
  245. }
  246. case WCStrTy: {
  247. const PointerType *PT = argTy->getAs<PointerType>();
  248. if (!PT)
  249. return false;
  250. QualType pointeeTy =
  251. C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
  252. return pointeeTy == C.getWCharType();
  253. }
  254. case WIntTy: {
  255. // Instead of doing a lookup for the definition of 'wint_t' (which
  256. // is defined by the system headers) instead see if wchar_t and
  257. // the argument type promote to the same type.
  258. QualType PromoWChar =
  259. C.getWCharType()->isPromotableIntegerType()
  260. ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
  261. QualType PromoArg =
  262. argTy->isPromotableIntegerType()
  263. ? C.getPromotedIntegerType(argTy) : argTy;
  264. PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
  265. PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
  266. return PromoWChar == PromoArg;
  267. }
  268. case CPointerTy:
  269. return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
  270. argTy->isNullPtrType();
  271. case ObjCPointerTy:
  272. return argTy->getAs<ObjCObjectPointerType>() != NULL;
  273. }
  274. // FIXME: Should be unreachable, but Clang is currently emitting
  275. // a warning.
  276. return false;
  277. }
  278. QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
  279. switch (K) {
  280. case InvalidTy:
  281. llvm_unreachable("No representative type for Invalid ArgTypeResult");
  282. case UnknownTy:
  283. return QualType();
  284. case AnyCharTy:
  285. return C.CharTy;
  286. case SpecificTy:
  287. return T;
  288. case CStrTy:
  289. return C.getPointerType(C.CharTy);
  290. case WCStrTy:
  291. return C.getPointerType(C.getWCharType());
  292. case ObjCPointerTy:
  293. return C.ObjCBuiltinIdTy;
  294. case CPointerTy:
  295. return C.VoidPtrTy;
  296. case WIntTy: {
  297. QualType WC = C.getWCharType();
  298. return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
  299. }
  300. }
  301. // FIXME: Should be unreachable, but Clang is currently emitting
  302. // a warning.
  303. return QualType();
  304. }
  305. std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
  306. std::string S = getRepresentativeType(C).getAsString();
  307. if (Name)
  308. return std::string("'") + Name + "' (aka '" + S + "')";
  309. return std::string("'") + S + "'";
  310. }
  311. //===----------------------------------------------------------------------===//
  312. // Methods on OptionalAmount.
  313. //===----------------------------------------------------------------------===//
  314. ArgTypeResult
  315. analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
  316. return Ctx.IntTy;
  317. }
  318. //===----------------------------------------------------------------------===//
  319. // Methods on LengthModifier.
  320. //===----------------------------------------------------------------------===//
  321. const char *
  322. analyze_format_string::LengthModifier::toString() const {
  323. switch (kind) {
  324. case AsChar:
  325. return "hh";
  326. case AsShort:
  327. return "h";
  328. case AsLong: // or AsWideChar
  329. return "l";
  330. case AsLongLong:
  331. return "ll";
  332. case AsIntMax:
  333. return "j";
  334. case AsSizeT:
  335. return "z";
  336. case AsPtrDiff:
  337. return "t";
  338. case AsLongDouble:
  339. return "L";
  340. case None:
  341. return "";
  342. }
  343. return NULL;
  344. }
  345. //===----------------------------------------------------------------------===//
  346. // Methods on ConversionSpecifier.
  347. //===----------------------------------------------------------------------===//
  348. const char *ConversionSpecifier::toString() const {
  349. switch (kind) {
  350. case dArg: return "d";
  351. case iArg: return "i";
  352. case oArg: return "o";
  353. case uArg: return "u";
  354. case xArg: return "x";
  355. case XArg: return "X";
  356. case fArg: return "f";
  357. case FArg: return "F";
  358. case eArg: return "e";
  359. case EArg: return "E";
  360. case gArg: return "g";
  361. case GArg: return "G";
  362. case aArg: return "a";
  363. case AArg: return "A";
  364. case cArg: return "c";
  365. case sArg: return "s";
  366. case pArg: return "p";
  367. case nArg: return "n";
  368. case PercentArg: return "%";
  369. case ScanListArg: return "[";
  370. case InvalidSpecifier: return NULL;
  371. // MacOS X unicode extensions.
  372. case CArg: return "C";
  373. case SArg: return "S";
  374. // Objective-C specific specifiers.
  375. case ObjCObjArg: return "@";
  376. // GlibC specific specifiers.
  377. case PrintErrno: return "m";
  378. }
  379. return NULL;
  380. }
  381. //===----------------------------------------------------------------------===//
  382. // Methods on OptionalAmount.
  383. //===----------------------------------------------------------------------===//
  384. void OptionalAmount::toString(raw_ostream &os) const {
  385. switch (hs) {
  386. case Invalid:
  387. case NotSpecified:
  388. return;
  389. case Arg:
  390. if (UsesDotPrefix)
  391. os << ".";
  392. if (usesPositionalArg())
  393. os << "*" << getPositionalArgIndex() << "$";
  394. else
  395. os << "*";
  396. break;
  397. case Constant:
  398. if (UsesDotPrefix)
  399. os << ".";
  400. os << amt;
  401. break;
  402. }
  403. }
  404. bool FormatSpecifier::hasValidLengthModifier() const {
  405. switch (LM.getKind()) {
  406. case LengthModifier::None:
  407. return true;
  408. // Handle most integer flags
  409. case LengthModifier::AsChar:
  410. case LengthModifier::AsShort:
  411. case LengthModifier::AsLongLong:
  412. case LengthModifier::AsIntMax:
  413. case LengthModifier::AsSizeT:
  414. case LengthModifier::AsPtrDiff:
  415. switch (CS.getKind()) {
  416. case ConversionSpecifier::dArg:
  417. case ConversionSpecifier::iArg:
  418. case ConversionSpecifier::oArg:
  419. case ConversionSpecifier::uArg:
  420. case ConversionSpecifier::xArg:
  421. case ConversionSpecifier::XArg:
  422. case ConversionSpecifier::nArg:
  423. return true;
  424. default:
  425. return false;
  426. }
  427. // Handle 'l' flag
  428. case LengthModifier::AsLong:
  429. switch (CS.getKind()) {
  430. case ConversionSpecifier::dArg:
  431. case ConversionSpecifier::iArg:
  432. case ConversionSpecifier::oArg:
  433. case ConversionSpecifier::uArg:
  434. case ConversionSpecifier::xArg:
  435. case ConversionSpecifier::XArg:
  436. case ConversionSpecifier::aArg:
  437. case ConversionSpecifier::AArg:
  438. case ConversionSpecifier::fArg:
  439. case ConversionSpecifier::FArg:
  440. case ConversionSpecifier::eArg:
  441. case ConversionSpecifier::EArg:
  442. case ConversionSpecifier::gArg:
  443. case ConversionSpecifier::GArg:
  444. case ConversionSpecifier::nArg:
  445. case ConversionSpecifier::cArg:
  446. case ConversionSpecifier::sArg:
  447. return true;
  448. default:
  449. return false;
  450. }
  451. case LengthModifier::AsLongDouble:
  452. switch (CS.getKind()) {
  453. case ConversionSpecifier::aArg:
  454. case ConversionSpecifier::AArg:
  455. case ConversionSpecifier::fArg:
  456. case ConversionSpecifier::FArg:
  457. case ConversionSpecifier::eArg:
  458. case ConversionSpecifier::EArg:
  459. case ConversionSpecifier::gArg:
  460. case ConversionSpecifier::GArg:
  461. return true;
  462. default:
  463. return false;
  464. }
  465. }
  466. return false;
  467. }