fuzzing.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. // -*- C++ -*-
  2. //===------------------------- fuzzing.cpp -------------------------------===//
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. // A set of routines to use when fuzzing the algorithms in libc++
  10. // Each one tests a single algorithm.
  11. //
  12. // They all have the form of:
  13. // int `algorithm`(const uint8_t *data, size_t size);
  14. //
  15. // They perform the operation, and then check to see if the results are correct.
  16. // If so, they return zero, and non-zero otherwise.
  17. //
  18. // For example, sort calls std::sort, then checks two things:
  19. // (1) The resulting vector is sorted
  20. // (2) The resulting vector contains the same elements as the original data.
  21. #include "fuzzing.h"
  22. #include <vector>
  23. #include <algorithm>
  24. #include <functional>
  25. #include <regex>
  26. #include <cassert>
  27. #include <iostream>
  28. // If we had C++14, we could use the four iterator version of is_permutation and equal
  29. namespace fuzzing {
  30. // This is a struct we can use to test the stable_XXX algorithms.
  31. // perform the operation on the key, then check the order of the payload.
  32. struct stable_test {
  33. uint8_t key;
  34. size_t payload;
  35. stable_test(uint8_t k) : key(k), payload(0) {}
  36. stable_test(uint8_t k, size_t p) : key(k), payload(p) {}
  37. };
  38. void swap(stable_test &lhs, stable_test &rhs)
  39. {
  40. using std::swap;
  41. swap(lhs.key, rhs.key);
  42. swap(lhs.payload, rhs.payload);
  43. }
  44. struct key_less
  45. {
  46. bool operator () (const stable_test &lhs, const stable_test &rhs) const
  47. {
  48. return lhs.key < rhs.key;
  49. }
  50. };
  51. struct payload_less
  52. {
  53. bool operator () (const stable_test &lhs, const stable_test &rhs) const
  54. {
  55. return lhs.payload < rhs.payload;
  56. }
  57. };
  58. struct total_less
  59. {
  60. bool operator () (const stable_test &lhs, const stable_test &rhs) const
  61. {
  62. return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key;
  63. }
  64. };
  65. bool operator==(const stable_test &lhs, const stable_test &rhs)
  66. {
  67. return lhs.key == rhs.key && lhs.payload == rhs.payload;
  68. }
  69. template<typename T>
  70. struct is_even
  71. {
  72. bool operator () (const T &t) const
  73. {
  74. return t % 2 == 0;
  75. }
  76. };
  77. template<>
  78. struct is_even<stable_test>
  79. {
  80. bool operator () (const stable_test &t) const
  81. {
  82. return t.key % 2 == 0;
  83. }
  84. };
  85. typedef std::vector<uint8_t> Vec;
  86. typedef std::vector<stable_test> StableVec;
  87. typedef StableVec::const_iterator SVIter;
  88. // Cheap version of is_permutation
  89. // Builds a set of buckets for each of the key values.
  90. // Sums all the payloads.
  91. // Not 100% perfect, but _way_ faster
  92. bool is_permutation(SVIter first1, SVIter last1, SVIter first2)
  93. {
  94. size_t xBuckets[256] = {0};
  95. size_t xPayloads[256] = {0};
  96. size_t yBuckets[256] = {0};
  97. size_t yPayloads[256] = {0};
  98. for (; first1 != last1; ++first1, ++first2)
  99. {
  100. xBuckets [first1->key]++;
  101. xPayloads[first1->key] += first1->payload;
  102. yBuckets [first2->key]++;
  103. yPayloads[first2->key] += first2->payload;
  104. }
  105. for (size_t i = 0; i < 256; ++i)
  106. {
  107. if (xBuckets[i] != yBuckets[i])
  108. return false;
  109. if (xPayloads[i] != yPayloads[i])
  110. return false;
  111. }
  112. return true;
  113. }
  114. template <typename Iter1, typename Iter2>
  115. bool is_permutation(Iter1 first1, Iter1 last1, Iter2 first2)
  116. {
  117. static_assert((std::is_same<typename std::iterator_traits<Iter1>::value_type, uint8_t>::value), "");
  118. static_assert((std::is_same<typename std::iterator_traits<Iter2>::value_type, uint8_t>::value), "");
  119. size_t xBuckets[256] = {0};
  120. size_t yBuckets[256] = {0};
  121. for (; first1 != last1; ++first1, ++first2)
  122. {
  123. xBuckets [*first1]++;
  124. yBuckets [*first2]++;
  125. }
  126. for (size_t i = 0; i < 256; ++i)
  127. if (xBuckets[i] != yBuckets[i])
  128. return false;
  129. return true;
  130. }
  131. // == sort ==
  132. int sort(const uint8_t *data, size_t size)
  133. {
  134. Vec working(data, data + size);
  135. std::sort(working.begin(), working.end());
  136. if (!std::is_sorted(working.begin(), working.end())) return 1;
  137. if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
  138. return 0;
  139. }
  140. // == stable_sort ==
  141. int stable_sort(const uint8_t *data, size_t size)
  142. {
  143. StableVec input;
  144. for (size_t i = 0; i < size; ++i)
  145. input.push_back(stable_test(data[i], i));
  146. StableVec working = input;
  147. std::stable_sort(working.begin(), working.end(), key_less());
  148. if (!std::is_sorted(working.begin(), working.end(), key_less())) return 1;
  149. auto iter = working.begin();
  150. while (iter != working.end())
  151. {
  152. auto range = std::equal_range(iter, working.end(), *iter, key_less());
  153. if (!std::is_sorted(range.first, range.second, total_less())) return 2;
  154. iter = range.second;
  155. }
  156. if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99;
  157. return 0;
  158. }
  159. // == partition ==
  160. int partition(const uint8_t *data, size_t size)
  161. {
  162. Vec working(data, data + size);
  163. auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>());
  164. if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1;
  165. if (!std::none_of(iter, working.end(), is_even<uint8_t>())) return 2;
  166. if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
  167. return 0;
  168. }
  169. // == partition_copy ==
  170. int partition_copy(const uint8_t *data, size_t size)
  171. {
  172. Vec v1, v2;
  173. auto iter = std::partition_copy(data, data + size,
  174. std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2),
  175. is_even<uint8_t>());
  176. // The two vectors should add up to the original size
  177. if (v1.size() + v2.size() != size) return 1;
  178. // All of the even values should be in the first vector, and none in the second
  179. if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2;
  180. if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3;
  181. // Every value in both vectors has to be in the original
  182. // Make a copy of the input, and sort it
  183. Vec v0{data, data + size};
  184. std::sort(v0.begin(), v0.end());
  185. // Sort each vector and ensure that all of the elements appear in the original input
  186. std::sort(v1.begin(), v1.end());
  187. if (!std::includes(v0.begin(), v0.end(), v1.begin(), v1.end())) return 4;
  188. std::sort(v2.begin(), v2.end());
  189. if (!std::includes(v0.begin(), v0.end(), v2.begin(), v2.end())) return 5;
  190. // This, while simple, is really slow - 20 seconds on a 500K element input.
  191. // for (auto v: v1)
  192. // if (std::find(data, data + size, v) == data + size) return 4;
  193. //
  194. // for (auto v: v2)
  195. // if (std::find(data, data + size, v) == data + size) return 5;
  196. return 0;
  197. }
  198. // == stable_partition ==
  199. int stable_partition (const uint8_t *data, size_t size)
  200. {
  201. StableVec input;
  202. for (size_t i = 0; i < size; ++i)
  203. input.push_back(stable_test(data[i], i));
  204. StableVec working = input;
  205. auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>());
  206. if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1;
  207. if (!std::none_of(iter, working.end(), is_even<stable_test>())) return 2;
  208. if (!std::is_sorted(working.begin(), iter, payload_less())) return 3;
  209. if (!std::is_sorted(iter, working.end(), payload_less())) return 4;
  210. if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99;
  211. return 0;
  212. }
  213. // == nth_element ==
  214. // use the first element as a position into the data
  215. int nth_element (const uint8_t *data, size_t size)
  216. {
  217. if (size <= 1) return 0;
  218. const size_t partition_point = data[0] % size;
  219. Vec working(data + 1, data + size);
  220. const auto partition_iter = working.begin() + partition_point;
  221. std::nth_element(working.begin(), partition_iter, working.end());
  222. // nth may be the end iterator, in this case nth_element has no effect.
  223. if (partition_iter == working.end())
  224. {
  225. if (!std::equal(data + 1, data + size, working.begin())) return 98;
  226. }
  227. else
  228. {
  229. const uint8_t nth = *partition_iter;
  230. if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; }))
  231. return 1;
  232. if (!std::all_of(partition_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
  233. return 2;
  234. if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99;
  235. }
  236. return 0;
  237. }
  238. // == partial_sort ==
  239. // use the first element as a position into the data
  240. int partial_sort (const uint8_t *data, size_t size)
  241. {
  242. if (size <= 1) return 0;
  243. const size_t sort_point = data[0] % size;
  244. Vec working(data + 1, data + size);
  245. const auto sort_iter = working.begin() + sort_point;
  246. std::partial_sort(working.begin(), sort_iter, working.end());
  247. if (sort_iter != working.end())
  248. {
  249. const uint8_t nth = *std::min_element(sort_iter, working.end());
  250. if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; }))
  251. return 1;
  252. if (!std::all_of(sort_iter, working.end(), [=](uint8_t v) { return v >= nth; }))
  253. return 2;
  254. }
  255. if (!std::is_sorted(working.begin(), sort_iter)) return 3;
  256. if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99;
  257. return 0;
  258. }
  259. // == partial_sort_copy ==
  260. // use the first element as a count
  261. int partial_sort_copy (const uint8_t *data, size_t size)
  262. {
  263. if (size <= 1) return 0;
  264. const size_t num_results = data[0] % size;
  265. Vec results(num_results);
  266. (void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end());
  267. // The results have to be sorted
  268. if (!std::is_sorted(results.begin(), results.end())) return 1;
  269. // All the values in results have to be in the original data
  270. for (auto v: results)
  271. if (std::find(data + 1, data + size, v) == data + size) return 2;
  272. // The things in results have to be the smallest N in the original data
  273. Vec sorted(data + 1, data + size);
  274. std::sort(sorted.begin(), sorted.end());
  275. if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3;
  276. return 0;
  277. }
  278. // The second sequence has been "uniqued"
  279. template <typename Iter1, typename Iter2>
  280. static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2)
  281. {
  282. assert(first1 != last1 && first2 != last2);
  283. if (*first1 != *first2) return false;
  284. uint8_t last_value = *first1;
  285. ++first1; ++first2;
  286. while(first1 != last1 && first2 != last2)
  287. {
  288. // Skip over dups in the first sequence
  289. while (*first1 == last_value)
  290. if (++first1 == last1) return false;
  291. if (*first1 != *first2) return false;
  292. last_value = *first1;
  293. ++first1; ++first2;
  294. }
  295. // Still stuff left in the 'uniqued' sequence - oops
  296. if (first1 == last1 && first2 != last2) return false;
  297. // Still stuff left in the original sequence - better be all the same
  298. while (first1 != last1)
  299. {
  300. if (*first1 != last_value) return false;
  301. ++first1;
  302. }
  303. return true;
  304. }
  305. // == unique ==
  306. int unique (const uint8_t *data, size_t size)
  307. {
  308. Vec working(data, data + size);
  309. std::sort(working.begin(), working.end());
  310. Vec results = working;
  311. Vec::iterator new_end = std::unique(results.begin(), results.end());
  312. Vec::iterator it; // scratch iterator
  313. // Check the size of the unique'd sequence.
  314. // it should only be zero if the input sequence was empty.
  315. if (results.begin() == new_end)
  316. return working.size() == 0 ? 0 : 1;
  317. // 'results' is sorted
  318. if (!std::is_sorted(results.begin(), new_end)) return 2;
  319. // All the elements in 'results' must be different
  320. it = results.begin();
  321. uint8_t prev_value = *it++;
  322. for (; it != new_end; ++it)
  323. {
  324. if (*it == prev_value) return 3;
  325. prev_value = *it;
  326. }
  327. // Every element in 'results' must be in 'working'
  328. for (it = results.begin(); it != new_end; ++it)
  329. if (std::find(working.begin(), working.end(), *it) == working.end())
  330. return 4;
  331. // Every element in 'working' must be in 'results'
  332. for (auto v : working)
  333. if (std::find(results.begin(), new_end, v) == new_end)
  334. return 5;
  335. return 0;
  336. }
  337. // == unique_copy ==
  338. int unique_copy (const uint8_t *data, size_t size)
  339. {
  340. Vec working(data, data + size);
  341. std::sort(working.begin(), working.end());
  342. Vec results;
  343. (void) std::unique_copy(working.begin(), working.end(),
  344. std::back_inserter<Vec>(results));
  345. Vec::iterator it; // scratch iterator
  346. // Check the size of the unique'd sequence.
  347. // it should only be zero if the input sequence was empty.
  348. if (results.size() == 0)
  349. return working.size() == 0 ? 0 : 1;
  350. // 'results' is sorted
  351. if (!std::is_sorted(results.begin(), results.end())) return 2;
  352. // All the elements in 'results' must be different
  353. it = results.begin();
  354. uint8_t prev_value = *it++;
  355. for (; it != results.end(); ++it)
  356. {
  357. if (*it == prev_value) return 3;
  358. prev_value = *it;
  359. }
  360. // Every element in 'results' must be in 'working'
  361. for (auto v : results)
  362. if (std::find(working.begin(), working.end(), v) == working.end())
  363. return 4;
  364. // Every element in 'working' must be in 'results'
  365. for (auto v : working)
  366. if (std::find(results.begin(), results.end(), v) == results.end())
  367. return 5;
  368. return 0;
  369. }
  370. // -- regex fuzzers
  371. static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag)
  372. {
  373. if (size > 0)
  374. {
  375. try
  376. {
  377. std::string s((const char *)data, size);
  378. std::regex re(s, flag);
  379. return std::regex_match(s, re) ? 1 : 0;
  380. }
  381. catch (std::regex_error &ex) {}
  382. }
  383. return 0;
  384. }
  385. int regex_ECMAScript (const uint8_t *data, size_t size)
  386. {
  387. (void) regex_helper(data, size, std::regex_constants::ECMAScript);
  388. return 0;
  389. }
  390. int regex_POSIX (const uint8_t *data, size_t size)
  391. {
  392. (void) regex_helper(data, size, std::regex_constants::basic);
  393. return 0;
  394. }
  395. int regex_extended (const uint8_t *data, size_t size)
  396. {
  397. (void) regex_helper(data, size, std::regex_constants::extended);
  398. return 0;
  399. }
  400. int regex_awk (const uint8_t *data, size_t size)
  401. {
  402. (void) regex_helper(data, size, std::regex_constants::awk);
  403. return 0;
  404. }
  405. int regex_grep (const uint8_t *data, size_t size)
  406. {
  407. (void) regex_helper(data, size, std::regex_constants::grep);
  408. return 0;
  409. }
  410. int regex_egrep (const uint8_t *data, size_t size)
  411. {
  412. (void) regex_helper(data, size, std::regex_constants::egrep);
  413. return 0;
  414. }
  415. // -- heap fuzzers
  416. int make_heap (const uint8_t *data, size_t size)
  417. {
  418. Vec working(data, data + size);
  419. std::make_heap(working.begin(), working.end());
  420. if (!std::is_heap(working.begin(), working.end())) return 1;
  421. if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
  422. return 0;
  423. }
  424. int push_heap (const uint8_t *data, size_t size)
  425. {
  426. if (size < 2) return 0;
  427. // Make a heap from the first half of the data
  428. Vec working(data, data + size);
  429. auto iter = working.begin() + (size / 2);
  430. std::make_heap(working.begin(), iter);
  431. if (!std::is_heap(working.begin(), iter)) return 1;
  432. // Now push the rest onto the heap, one at a time
  433. ++iter;
  434. for (; iter != working.end(); ++iter) {
  435. std::push_heap(working.begin(), iter);
  436. if (!std::is_heap(working.begin(), iter)) return 2;
  437. }
  438. if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
  439. return 0;
  440. }
  441. int pop_heap (const uint8_t *data, size_t size)
  442. {
  443. if (size < 2) return 0;
  444. Vec working(data, data + size);
  445. std::make_heap(working.begin(), working.end());
  446. // Pop things off, one at a time
  447. auto iter = --working.end();
  448. while (iter != working.begin()) {
  449. std::pop_heap(working.begin(), iter);
  450. if (!std::is_heap(working.begin(), --iter)) return 2;
  451. }
  452. return 0;
  453. }
  454. // -- search fuzzers
  455. int search (const uint8_t *data, size_t size)
  456. {
  457. if (size < 2) return 0;
  458. const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
  459. assert(pat_size <= size - 1);
  460. const uint8_t *pat_begin = data + 1;
  461. const uint8_t *pat_end = pat_begin + pat_size;
  462. const uint8_t *data_end = data + size;
  463. assert(pat_end <= data_end);
  464. // std::cerr << "data[0] = " << size_t(data[0]) << " ";
  465. // std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl;
  466. auto it = std::search(pat_end, data_end, pat_begin, pat_end);
  467. if (it != data_end) // not found
  468. if (!std::equal(pat_begin, pat_end, it))
  469. return 1;
  470. return 0;
  471. }
  472. template <typename S>
  473. static int search_helper (const uint8_t *data, size_t size)
  474. {
  475. if (size < 2) return 0;
  476. const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
  477. const uint8_t *pat_begin = data + 1;
  478. const uint8_t *pat_end = pat_begin + pat_size;
  479. const uint8_t *data_end = data + size;
  480. auto it = std::search(pat_end, data_end, S(pat_begin, pat_end));
  481. if (it != data_end) // not found
  482. if (!std::equal(pat_begin, pat_end, it))
  483. return 1;
  484. return 0;
  485. }
  486. // These are still in std::experimental
  487. // int search_boyer_moore (const uint8_t *data, size_t size)
  488. // {
  489. // return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size);
  490. // }
  491. //
  492. // int search_boyer_moore_horspool (const uint8_t *data, size_t size)
  493. // {
  494. // return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size);
  495. // }
  496. // -- set operation fuzzers
  497. template <typename S>
  498. static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2)
  499. {
  500. assert(size > 1);
  501. const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
  502. const uint8_t *pat_begin = data + 1;
  503. const uint8_t *pat_end = pat_begin + pat_size;
  504. const uint8_t *data_end = data + size;
  505. v1.assign(pat_begin, pat_end);
  506. v2.assign(pat_end, data_end);
  507. std::sort(v1.begin(), v1.end());
  508. std::sort(v2.begin(), v2.end());
  509. }
  510. } // namespace fuzzing