StringMap.cpp 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. //===--- StringMap.cpp - String Hash table map implementation -------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by Chris Lattner and is distributed under
  6. // the University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the StringMap class.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ADT/StringMap.h"
  14. #include <cassert>
  15. using namespace llvm;
  16. StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  17. ItemSize = itemSize;
  18. // If a size is specified, initialize the table with that many buckets.
  19. if (InitSize) {
  20. init(InitSize);
  21. return;
  22. }
  23. // Otherwise, initialize it with zero buckets to avoid the allocation.
  24. TheTable = 0;
  25. NumBuckets = 0;
  26. NumItems = 0;
  27. NumTombstones = 0;
  28. }
  29. void StringMapImpl::init(unsigned InitSize) {
  30. assert((InitSize & (InitSize-1)) == 0 &&
  31. "Init Size must be a power of 2 or zero!");
  32. NumBuckets = InitSize ? InitSize : 16;
  33. NumItems = 0;
  34. NumTombstones = 0;
  35. TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
  36. // Allocate one extra bucket, set it to look filled so the iterators stop at
  37. // end.
  38. TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
  39. }
  40. /// HashString - Compute a hash code for the specified string.
  41. ///
  42. static unsigned HashString(const char *Start, const char *End) {
  43. // Bernstein hash function.
  44. unsigned int Result = 0;
  45. // TODO: investigate whether a modified bernstein hash function performs
  46. // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  47. // X*33+c -> X*33^c
  48. while (Start != End)
  49. Result = Result * 33 + *Start++;
  50. Result = Result + (Result >> 5);
  51. return Result;
  52. }
  53. /// LookupBucketFor - Look up the bucket that the specified string should end
  54. /// up in. If it already exists as a key in the map, the Item pointer for the
  55. /// specified bucket will be non-null. Otherwise, it will be null. In either
  56. /// case, the FullHashValue field of the bucket will be set to the hash value
  57. /// of the string.
  58. unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
  59. const char *NameEnd) {
  60. unsigned HTSize = NumBuckets;
  61. if (HTSize == 0) { // Hash table unallocated so far?
  62. init(16);
  63. HTSize = NumBuckets;
  64. }
  65. unsigned FullHashValue = HashString(NameStart, NameEnd);
  66. unsigned BucketNo = FullHashValue & (HTSize-1);
  67. unsigned ProbeAmt = 1;
  68. int FirstTombstone = -1;
  69. while (1) {
  70. ItemBucket &Bucket = TheTable[BucketNo];
  71. StringMapEntryBase *BucketItem = Bucket.Item;
  72. // If we found an empty bucket, this key isn't in the table yet, return it.
  73. if (BucketItem == 0) {
  74. // If we found a tombstone, we want to reuse the tombstone instead of an
  75. // empty bucket. This reduces probing.
  76. if (FirstTombstone != -1) {
  77. TheTable[FirstTombstone].FullHashValue = FullHashValue;
  78. return FirstTombstone;
  79. }
  80. Bucket.FullHashValue = FullHashValue;
  81. return BucketNo;
  82. }
  83. if (BucketItem == getTombstoneVal()) {
  84. // Skip over tombstones. However, remember the first one we see.
  85. if (FirstTombstone == -1) FirstTombstone = BucketNo;
  86. } else if (Bucket.FullHashValue == FullHashValue) {
  87. // If the full hash value matches, check deeply for a match. The common
  88. // case here is that we are only looking at the buckets (for item info
  89. // being non-null and for the full hash value) not at the items. This
  90. // is important for cache locality.
  91. // Do the comparison like this because NameStart isn't necessarily
  92. // null-terminated!
  93. char *ItemStr = (char*)BucketItem+ItemSize;
  94. unsigned ItemStrLen = BucketItem->getKeyLength();
  95. if (unsigned(NameEnd-NameStart) == ItemStrLen &&
  96. memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
  97. // We found a match!
  98. return BucketNo;
  99. }
  100. }
  101. // Okay, we didn't find the item. Probe to the next bucket.
  102. BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
  103. // Use quadratic probing, it has fewer clumping artifacts than linear
  104. // probing and has good cache behavior in the common case.
  105. ++ProbeAmt;
  106. }
  107. }
  108. /// FindKey - Look up the bucket that contains the specified key. If it exists
  109. /// in the map, return the bucket number of the key. Otherwise return -1.
  110. /// This does not modify the map.
  111. int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
  112. unsigned HTSize = NumBuckets;
  113. if (HTSize == 0) return -1; // Really empty table?
  114. unsigned FullHashValue = HashString(KeyStart, KeyEnd);
  115. unsigned BucketNo = FullHashValue & (HTSize-1);
  116. unsigned ProbeAmt = 1;
  117. while (1) {
  118. ItemBucket &Bucket = TheTable[BucketNo];
  119. StringMapEntryBase *BucketItem = Bucket.Item;
  120. // If we found an empty bucket, this key isn't in the table yet, return.
  121. if (BucketItem == 0)
  122. return -1;
  123. if (BucketItem == getTombstoneVal()) {
  124. // Ignore tombstones.
  125. } else if (Bucket.FullHashValue == FullHashValue) {
  126. // If the full hash value matches, check deeply for a match. The common
  127. // case here is that we are only looking at the buckets (for item info
  128. // being non-null and for the full hash value) not at the items. This
  129. // is important for cache locality.
  130. // Do the comparison like this because NameStart isn't necessarily
  131. // null-terminated!
  132. char *ItemStr = (char*)BucketItem+ItemSize;
  133. unsigned ItemStrLen = BucketItem->getKeyLength();
  134. if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
  135. memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
  136. // We found a match!
  137. return BucketNo;
  138. }
  139. }
  140. // Okay, we didn't find the item. Probe to the next bucket.
  141. BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
  142. // Use quadratic probing, it has fewer clumping artifacts than linear
  143. // probing and has good cache behavior in the common case.
  144. ++ProbeAmt;
  145. }
  146. }
  147. /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
  148. /// delete it. This aborts if the value isn't in the table.
  149. void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
  150. const char *VStr = (char*)V + ItemSize;
  151. StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
  152. V2 = V2;
  153. assert(V == V2 && "Didn't find key?");
  154. }
  155. /// RemoveKey - Remove the StringMapEntry for the specified key from the
  156. /// table, returning it. If the key is not in the table, this returns null.
  157. StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
  158. const char *KeyEnd) {
  159. int Bucket = FindKey(KeyStart, KeyEnd);
  160. if (Bucket == -1) return 0;
  161. StringMapEntryBase *Result = TheTable[Bucket].Item;
  162. TheTable[Bucket].Item = getTombstoneVal();
  163. --NumItems;
  164. ++NumTombstones;
  165. return Result;
  166. }
  167. /// RehashTable - Grow the table, redistributing values into the buckets with
  168. /// the appropriate mod-of-hashtable-size.
  169. void StringMapImpl::RehashTable() {
  170. unsigned NewSize = NumBuckets*2;
  171. // Allocate one extra bucket which will always be non-empty. This allows the
  172. // iterators to stop at end.
  173. ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
  174. NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
  175. // Rehash all the items into their new buckets. Luckily :) we already have
  176. // the hash values available, so we don't have to rehash any strings.
  177. for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
  178. if (IB->Item && IB->Item != getTombstoneVal()) {
  179. // Fast case, bucket available.
  180. unsigned FullHash = IB->FullHashValue;
  181. unsigned NewBucket = FullHash & (NewSize-1);
  182. if (NewTableArray[NewBucket].Item == 0) {
  183. NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
  184. NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
  185. continue;
  186. }
  187. // Otherwise probe for a spot.
  188. unsigned ProbeSize = 1;
  189. do {
  190. NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
  191. } while (NewTableArray[NewBucket].Item);
  192. // Finally found a slot. Fill it in.
  193. NewTableArray[NewBucket].Item = IB->Item;
  194. NewTableArray[NewBucket].FullHashValue = FullHash;
  195. }
  196. }
  197. free(TheTable);
  198. TheTable = NewTableArray;
  199. NumBuckets = NewSize;
  200. }