TrigramIndexTest.cpp 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. //===- TrigramIndexTest.cpp - Unit tests for TrigramIndex -----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Support/TrigramIndex.h"
  9. #include "llvm/ADT/STLExtras.h"
  10. #include "gtest/gtest.h"
  11. #include <string>
  12. #include <vector>
  13. using namespace llvm;
  14. namespace {
  15. class TrigramIndexTest : public ::testing::Test {
  16. protected:
  17. std::unique_ptr<TrigramIndex> makeTrigramIndex(
  18. std::vector<std::string> Rules) {
  19. std::unique_ptr<TrigramIndex> TI =
  20. std::make_unique<TrigramIndex>();
  21. for (auto &Rule : Rules)
  22. TI->insert(Rule);
  23. return TI;
  24. }
  25. };
  26. TEST_F(TrigramIndexTest, Empty) {
  27. std::unique_ptr<TrigramIndex> TI =
  28. makeTrigramIndex({});
  29. EXPECT_FALSE(TI->isDefeated());
  30. EXPECT_TRUE(TI->isDefinitelyOut("foo"));
  31. }
  32. TEST_F(TrigramIndexTest, Basic) {
  33. std::unique_ptr<TrigramIndex> TI =
  34. makeTrigramIndex({"*hello*", "*wor.d*"});
  35. EXPECT_FALSE(TI->isDefeated());
  36. EXPECT_TRUE(TI->isDefinitelyOut("foo"));
  37. }
  38. TEST_F(TrigramIndexTest, NoTrigramsInRules) {
  39. std::unique_ptr<TrigramIndex> TI =
  40. makeTrigramIndex({"b.r", "za*az"});
  41. EXPECT_TRUE(TI->isDefeated());
  42. EXPECT_FALSE(TI->isDefinitelyOut("foo"));
  43. EXPECT_FALSE(TI->isDefinitelyOut("bar"));
  44. EXPECT_FALSE(TI->isDefinitelyOut("zakaz"));
  45. }
  46. TEST_F(TrigramIndexTest, NoTrigramsInARule) {
  47. std::unique_ptr<TrigramIndex> TI =
  48. makeTrigramIndex({"*hello*", "*wo.ld*"});
  49. EXPECT_TRUE(TI->isDefeated());
  50. EXPECT_FALSE(TI->isDefinitelyOut("foo"));
  51. }
  52. TEST_F(TrigramIndexTest, RepetitiveRule) {
  53. std::unique_ptr<TrigramIndex> TI =
  54. makeTrigramIndex({"*bar*bar*bar*bar*bar", "bar*bar"});
  55. EXPECT_FALSE(TI->isDefeated());
  56. EXPECT_TRUE(TI->isDefinitelyOut("foo"));
  57. EXPECT_TRUE(TI->isDefinitelyOut("bar"));
  58. EXPECT_FALSE(TI->isDefinitelyOut("barbara"));
  59. EXPECT_FALSE(TI->isDefinitelyOut("bar+bar"));
  60. }
  61. TEST_F(TrigramIndexTest, PopularTrigram) {
  62. std::unique_ptr<TrigramIndex> TI =
  63. makeTrigramIndex({"*aaa*", "*aaaa*", "*aaaaa*", "*aaaaa*", "*aaaaaa*"});
  64. EXPECT_TRUE(TI->isDefeated());
  65. }
  66. TEST_F(TrigramIndexTest, PopularTrigram2) {
  67. std::unique_ptr<TrigramIndex> TI =
  68. makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h", "class.h"});
  69. EXPECT_TRUE(TI->isDefeated());
  70. }
  71. TEST_F(TrigramIndexTest, TooComplicatedRegex) {
  72. std::unique_ptr<TrigramIndex> TI =
  73. makeTrigramIndex({"[0-9]+"});
  74. EXPECT_TRUE(TI->isDefeated());
  75. }
  76. TEST_F(TrigramIndexTest, TooComplicatedRegex2) {
  77. std::unique_ptr<TrigramIndex> TI =
  78. makeTrigramIndex({"foo|bar"});
  79. EXPECT_TRUE(TI->isDefeated());
  80. }
  81. TEST_F(TrigramIndexTest, EscapedSymbols) {
  82. std::unique_ptr<TrigramIndex> TI =
  83. makeTrigramIndex({"*c\\+\\+*", "*hello\\\\world*", "a\\tb", "a\\0b"});
  84. EXPECT_FALSE(TI->isDefeated());
  85. EXPECT_FALSE(TI->isDefinitelyOut("c++"));
  86. EXPECT_TRUE(TI->isDefinitelyOut("c\\+\\+"));
  87. EXPECT_FALSE(TI->isDefinitelyOut("hello\\world"));
  88. EXPECT_TRUE(TI->isDefinitelyOut("hello\\\\world"));
  89. EXPECT_FALSE(TI->isDefinitelyOut("atb"));
  90. EXPECT_TRUE(TI->isDefinitelyOut("a\\tb"));
  91. EXPECT_TRUE(TI->isDefinitelyOut("a\tb"));
  92. EXPECT_FALSE(TI->isDefinitelyOut("a0b"));
  93. }
  94. TEST_F(TrigramIndexTest, Backreference1) {
  95. std::unique_ptr<TrigramIndex> TI =
  96. makeTrigramIndex({"*foo\\1*"});
  97. EXPECT_TRUE(TI->isDefeated());
  98. }
  99. TEST_F(TrigramIndexTest, Backreference2) {
  100. std::unique_ptr<TrigramIndex> TI =
  101. makeTrigramIndex({"*foo\\2*"});
  102. EXPECT_TRUE(TI->isDefeated());
  103. }
  104. TEST_F(TrigramIndexTest, Sequence) {
  105. std::unique_ptr<TrigramIndex> TI =
  106. makeTrigramIndex({"class1.h", "class2.h", "class3.h", "class4.h"});
  107. EXPECT_FALSE(TI->isDefeated());
  108. EXPECT_FALSE(TI->isDefinitelyOut("class1"));
  109. EXPECT_TRUE(TI->isDefinitelyOut("class.h"));
  110. EXPECT_TRUE(TI->isDefinitelyOut("class"));
  111. }
  112. } // namespace