ReservoirSamplerTest.cpp 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. //===- ReservoirSampler.cpp - Tests for the ReservoirSampler --------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/FuzzMutate/Random.h"
  9. #include "gtest/gtest.h"
  10. #include <random>
  11. using namespace llvm;
  12. TEST(ReservoirSamplerTest, OneItem) {
  13. std::mt19937 Rand;
  14. auto Sampler = makeSampler(Rand, 7, 1);
  15. ASSERT_FALSE(Sampler.isEmpty());
  16. ASSERT_EQ(7, Sampler.getSelection());
  17. }
  18. TEST(ReservoirSamplerTest, NoWeight) {
  19. std::mt19937 Rand;
  20. auto Sampler = makeSampler(Rand, 7, 0);
  21. ASSERT_TRUE(Sampler.isEmpty());
  22. }
  23. TEST(ReservoirSamplerTest, Uniform) {
  24. std::mt19937 Rand;
  25. // Run three chi-squared tests to check that the distribution is reasonably
  26. // uniform.
  27. std::vector<int> Items = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  28. int Failures = 0;
  29. for (int Run = 0; Run < 3; ++Run) {
  30. std::vector<int> Counts(Items.size(), 0);
  31. // We need $np_s > 5$ at minimum, but we're better off going a couple of
  32. // orders of magnitude larger.
  33. int N = Items.size() * 5 * 100;
  34. for (int I = 0; I < N; ++I) {
  35. auto Sampler = makeSampler(Rand, Items);
  36. Counts[Sampler.getSelection()] += 1;
  37. }
  38. // Knuth. TAOCP Vol. 2, 3.3.1 (8):
  39. // $V = \frac{1}{n} \sum_{s=1}^{k} \left(\frac{Y_s^2}{p_s}\right) - n$
  40. double Ps = 1.0 / Items.size();
  41. double Sum = 0.0;
  42. for (int Ys : Counts)
  43. Sum += Ys * Ys / Ps;
  44. double V = (Sum / N) - N;
  45. assert(Items.size() == 10 && "Our chi-squared values assume 10 items");
  46. // Since we have 10 items, there are 9 degrees of freedom and the table of
  47. // chi-squared values is as follows:
  48. //
  49. // | p=1% | 5% | 25% | 50% | 75% | 95% | 99% |
  50. // v=9 | 2.088 | 3.325 | 5.899 | 8.343 | 11.39 | 16.92 | 21.67 |
  51. //
  52. // Check that we're in the likely range of results.
  53. //if (V < 2.088 || V > 21.67)
  54. if (V < 2.088 || V > 21.67)
  55. ++Failures;
  56. }
  57. EXPECT_LT(Failures, 3) << "Non-uniform distribution?";
  58. }