|
Sierra Toolkit
Version of the Day
|
00001 #ifndef RDESTL_HASH_MAP_H 00002 #define RDESTL_HASH_MAP_H 00003 00004 #include <stk_util/util/algorithm_rdestl.h> 00005 #include <stk_util/util/allocator_rdestl.h> 00006 #include <stk_util/util/functional_rdestl.h> 00007 #include <stk_util/util/hash_rdestl.h> 00008 #include <stk_util/util/pair_rdestl.h> 00009 00010 namespace rde 00011 { 00012 00013 // TLoadFactor4 - controls hash map load. 4 means 100% load, ie. hashmap will grow 00014 // when number of items == capacity. Default value of 6 means it grows when 00015 // number of items == capacity * 3/2 (6/4). Higher load == tighter maps, but bigger 00016 // risk of collisions. 00017 template<typename TKey, typename TValue, 00018 class THashFunc = rde::hash<TKey>, 00019 int TLoadFactor4 = 6, 00020 class TKeyEqualFunc = rde::equal_to<TKey>, 00021 class TAllocator = rde::allocator> 00022 class hash_map 00023 { 00024 public: 00025 typedef rde::pair<TKey, TValue> value_type; 00026 00027 private: 00028 struct node 00029 { 00030 static const hash_value_t kUnusedHash = 0xFFFFFFFF; 00031 static const hash_value_t kDeletedHash = 0xFFFFFFFE; 00032 00033 node(): hash(kUnusedHash) {} 00034 00035 RDE_FORCEINLINE bool is_unused() const { return hash == kUnusedHash; } 00036 RDE_FORCEINLINE bool is_deleted() const { return hash == kDeletedHash; } 00037 RDE_FORCEINLINE bool is_occupied() const { return hash < kDeletedHash; } 00038 00039 hash_value_t hash; 00040 value_type data; 00041 }; 00042 template<typename TNodePtr, typename TPtr, typename TRef> 00043 class node_iterator 00044 { 00045 friend class hash_map; 00046 public: 00047 typedef forward_iterator_tag iterator_category; 00048 00049 explicit node_iterator(TNodePtr node, const hash_map* map) 00050 : m_node(node), 00051 m_map(map) 00052 {} 00053 template<typename UNodePtr, typename UPtr, typename URef> 00054 node_iterator(const node_iterator<UNodePtr, UPtr, URef>& rhs) 00055 : m_node(rhs.node()), 00056 m_map(rhs.get_map()) 00057 {} 00058 00059 TRef operator*() const 00060 { 00061 RDE_ASSERT(m_node != 0); 00062 return m_node->data; 00063 } 00064 TPtr operator->() const 00065 { 00066 return &m_node->data; 00067 } 00068 RDE_FORCEINLINE TNodePtr node() const 00069 { 00070 return m_node; 00071 } 00072 00073 node_iterator& operator++() 00074 { 00075 RDE_ASSERT(m_node != 0); 00076 ++m_node; 00077 move_to_next_occupied_node(); 00078 return *this; 00079 } 00080 node_iterator operator++(int) 00081 { 00082 node_iterator copy(*this); 00083 ++(*this); 00084 return copy; 00085 } 00086 00087 RDE_FORCEINLINE bool operator==(const node_iterator& rhs) const 00088 { 00089 return rhs.m_node == m_node; 00090 } 00091 bool operator!=(const node_iterator& rhs) const 00092 { 00093 return !(rhs == *this); 00094 } 00095 00096 const hash_map* get_map() const { return m_map; } 00097 private: 00098 void move_to_next_occupied_node() 00099 { 00100 // @todo: save nodeEnd in constructor? 00101 TNodePtr nodeEnd = m_map->m_nodes + m_map->bucket_count(); 00102 for (; m_node < nodeEnd; ++m_node) 00103 { 00104 if (m_node->is_occupied()) 00105 break; 00106 } 00107 } 00108 TNodePtr m_node; 00109 const hash_map* m_map; 00110 }; 00111 00112 public: 00113 typedef TKey key_type; 00114 typedef TValue mapped_type; 00115 typedef TAllocator allocator_type; 00116 typedef node_iterator<node*, value_type*, value_type&> iterator; 00117 typedef node_iterator<const node*, const value_type*, const value_type&> const_iterator; 00118 typedef int size_type; 00119 static const size_type kNodeSize = sizeof(node); 00120 static const size_type kInitialCapacity = 64; 00121 00122 hash_map() 00123 : m_nodes(&ms_emptyNode), 00124 m_size(0), 00125 m_capacity(0), 00126 m_capacityMask(0), 00127 m_numUsed(0) 00128 { 00129 RDE_ASSERT((kInitialCapacity & (kInitialCapacity - 1)) == 0); // Must be power-of-two 00130 } 00131 explicit hash_map(const allocator_type& allocator) 00132 : m_nodes(&ms_emptyNode), 00133 m_size(0), 00134 m_capacity(0), 00135 m_capacityMask(0), 00136 m_numUsed(0), 00137 m_allocator(allocator) 00138 { 00139 00140 } 00141 explicit hash_map(size_type initial_bucket_count, 00142 const allocator_type& allocator = allocator_type()) 00143 : m_nodes(&ms_emptyNode), 00144 m_size(0), 00145 m_capacity(0), 00146 m_capacityMask(0), 00147 m_numUsed(0), 00148 m_allocator(allocator) 00149 { 00150 reserve(initial_bucket_count); 00151 } 00152 hash_map(size_type initial_bucket_count, 00153 const THashFunc& hashFunc, 00154 const allocator_type& allocator = allocator_type()) 00155 : m_nodes(&ms_emptyNode), 00156 m_size(0), 00157 m_capacity(0), 00158 m_capacityMask(0), 00159 m_numUsed(0), 00160 m_hashFunc(hashFunc), 00161 m_allocator(allocator) 00162 { 00163 reserve(initial_bucket_count); 00164 } 00165 hash_map(const hash_map& rhs, const allocator_type& allocator = allocator_type()) 00166 : m_nodes(&ms_emptyNode), 00167 m_size(0), 00168 m_capacity(0), 00169 m_capacityMask(0), 00170 m_numUsed(0), 00171 m_allocator(allocator) 00172 { 00173 *this = rhs; 00174 } 00175 explicit hash_map(e_noinitialize) 00176 { 00177 00178 } 00179 ~hash_map() 00180 { 00181 delete_nodes(); 00182 } 00183 00184 iterator begin() 00185 { 00186 iterator it(m_nodes, this); 00187 it.move_to_next_occupied_node(); 00188 return it; 00189 } 00190 const_iterator begin() const 00191 { 00192 const_iterator it(m_nodes, this); 00193 it.move_to_next_occupied_node(); 00194 return it; 00195 } 00196 iterator end() { return iterator(m_nodes + m_capacity, this); } 00197 const_iterator end() const { return const_iterator(m_nodes + m_capacity, this); } 00198 00199 // @note: Added for compatiblity sake. 00200 // Personally, I consider it "risky". Use find/insert for more 00201 // explicit operations. 00202 mapped_type& operator[](const key_type& key) 00203 { 00204 hash_value_t hash; 00205 node* n = find_for_insert(key, &hash); 00206 if (n == 0 || !n->is_occupied()) 00207 { 00208 return insert_at(value_type(key, TValue()), n, hash).first->second; 00209 } 00210 return n->data.second; 00211 } 00212 // @note: Doesn't copy allocator. 00213 hash_map& operator=(const hash_map& rhs) 00214 { 00215 RDE_ASSERT(invariant()); 00216 if (&rhs != this) 00217 { 00218 clear(); 00219 if (m_capacity < rhs.bucket_count()) 00220 { 00221 delete_nodes(); 00222 m_nodes = allocate_nodes(rhs.bucket_count()); 00223 m_capacity = rhs.bucket_count(); 00224 m_capacityMask = m_capacity - 1; 00225 } 00226 rehash(m_capacity, m_nodes, rhs.m_capacity, rhs.m_nodes, false); 00227 m_size = rhs.size(); 00228 m_numUsed = rhs.m_numUsed; 00229 } 00230 RDE_ASSERT(invariant()); 00231 return *this; 00232 } 00233 void swap(hash_map& rhs) 00234 { 00235 if (&rhs != this) 00236 { 00237 RDE_ASSERT(invariant()); 00238 RDE_ASSERT(m_allocator == rhs.m_allocator); 00239 rde::swap(m_nodes, rhs.m_nodes); 00240 rde::swap(m_size, rhs.m_size); 00241 rde::swap(m_capacity, rhs.m_capacity); 00242 rde::swap(m_capacityMask, rhs.m_capacityMask); 00243 rde::swap(m_numUsed, rhs.m_numUsed); 00244 rde::swap(m_hashFunc, rhs.m_hashFunc); 00245 rde::swap(m_keyEqualFunc, rhs.m_keyEqualFunc); 00246 RDE_ASSERT(invariant()); 00247 } 00248 } 00249 00250 rde::pair<iterator, bool> insert(const value_type& v) 00251 { 00252 typedef rde::pair<iterator, bool> ret_type_t; 00253 RDE_ASSERT(invariant()); 00254 if (m_numUsed * TLoadFactor4 >= m_capacity * 4) 00255 grow(); 00256 00257 hash_value_t hash = 0XFFFFFFFF; 00258 00259 node* n = find_for_insert(v.first, &hash); 00260 if (n->is_occupied()) 00261 { 00262 RDE_ASSERT(hash == n->hash && m_keyEqualFunc(v.first, n->data.first)); 00263 return ret_type_t(iterator(n, this), false); 00264 } 00265 if (n->is_unused()) 00266 ++m_numUsed; 00267 rde::copy_construct(&n->data, v); 00268 n->hash = hash; 00269 ++m_size; 00270 RDE_ASSERT(invariant()); 00271 return ret_type_t(iterator(n, this), true); 00272 } 00273 00274 size_type erase(const key_type& key) 00275 { 00276 node* n = lookup(key); 00277 if (n != (m_nodes + m_capacity) && n->is_occupied()) 00278 { 00279 erase_node(n); 00280 return 1; 00281 } 00282 return 0; 00283 } 00284 void erase(iterator it) 00285 { 00286 RDE_ASSERT(it.get_map() == this); 00287 if (it != end()) 00288 { 00289 RDE_ASSERT(!empty()); 00290 erase_node(it.node()); 00291 } 00292 } 00293 void erase(iterator from, iterator to) 00294 { 00295 for (; from != to; ++from) 00296 { 00297 node* n = from.node(); 00298 if (n->is_occupied()) 00299 erase_node(n); 00300 } 00301 } 00302 00303 iterator find(const key_type& key) 00304 { 00305 node* n = lookup(key); 00306 return iterator(n, this); 00307 } 00308 const_iterator find(const key_type& key) const 00309 { 00310 const node* n = lookup(key); 00311 return const_iterator(n, this); 00312 } 00313 00314 void clear() 00315 { 00316 node* endNode = m_nodes + m_capacity; 00317 for (node* iter = m_nodes; iter != endNode; ++iter) 00318 { 00319 if( iter ) 00320 { 00321 if (iter->is_occupied()) 00322 { 00323 rde::destruct(&iter->data); 00324 } 00325 // We can make them unused, because we clear whole hash_map, 00326 // so we can guarantee there'll be no holes. 00327 iter->hash = node::kUnusedHash; 00328 } 00329 } 00330 m_size = 0; 00331 m_numUsed = 0; 00332 } 00333 00334 // More like reserve. 00335 // resize() name chosen for compatibility sake. 00336 void reserve(size_type min_size) 00337 { 00338 size_type newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity); 00339 while (newCapacity < min_size) 00340 newCapacity *= 2; 00341 if (newCapacity > m_capacity) 00342 grow(newCapacity); 00343 } 00344 00345 size_type bucket_count() const { return m_capacity; } 00346 size_type size() const { return m_size; } 00347 size_type empty() const { return size() == 0; } 00348 size_type nonempty_bucket_count() const { return m_numUsed; } 00349 size_type used_memory() const 00350 { 00351 return bucket_count() * kNodeSize; 00352 } 00353 00354 const allocator_type& get_allocator() const { return m_allocator; } 00355 void set_allocator(const allocator_type& allocator) 00356 { 00357 m_allocator = allocator; 00358 } 00359 00360 private: 00361 void grow() 00362 { 00363 const int newCapacity = (m_capacity == 0 ? kInitialCapacity : m_capacity * 2); 00364 grow(newCapacity); 00365 } 00366 void grow(int new_capacity) 00367 { 00368 RDE_ASSERT((new_capacity & (new_capacity - 1)) == 0); // Must be power-of-two 00369 node* newNodes = allocate_nodes(new_capacity); 00370 rehash(new_capacity, newNodes, m_capacity, m_nodes, true); 00371 if (m_nodes != &ms_emptyNode) 00372 m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity); 00373 m_capacity = new_capacity; 00374 m_capacityMask = new_capacity - 1; 00375 m_nodes = newNodes; 00376 m_numUsed = m_size; 00377 RDE_ASSERT(m_numUsed < m_capacity); 00378 } 00379 rde::pair<iterator, bool> insert_at(const value_type& v, node* n, 00380 hash_value_t hash) 00381 { 00382 RDE_ASSERT(invariant()); 00383 if (n == 0 || m_numUsed * TLoadFactor4 >= m_capacity * 4) 00384 return insert(v); 00385 00386 RDE_ASSERT(!n->is_occupied()); 00387 if (n->is_unused()) 00388 ++m_numUsed; 00389 rde::copy_construct(&n->data, v); 00390 n->hash = hash; 00391 ++m_size; 00392 RDE_ASSERT(invariant()); 00393 return rde::pair<iterator, bool>(iterator(n, this), true); 00394 } 00395 node* find_for_insert(const key_type& key, hash_value_t* out_hash) 00396 { 00397 if (m_capacity == 0) 00398 return 0; 00399 00400 const hash_value_t hash = hash_func(key); 00401 *out_hash = hash; 00402 uint32 i = hash & m_capacityMask; 00403 00404 node* n = m_nodes + i; 00405 if (n->hash == hash && m_keyEqualFunc(key, n->data.first)) 00406 return n; 00407 00408 node* freeNode(0); 00409 if (n->is_deleted()) 00410 freeNode = n; 00411 uint32 numProbes(0); 00412 // Guarantees loop termination. 00413 RDE_ASSERT(m_numUsed < m_capacity); 00414 while (!n->is_unused()) 00415 { 00416 ++numProbes; 00417 i = (i + numProbes) & m_capacityMask; 00418 n = m_nodes + i; 00419 if (compare_key(n, key, hash)) 00420 return n; 00421 if (n->is_deleted() && freeNode == 0) 00422 freeNode = n; 00423 } 00424 return freeNode ? freeNode : n; 00425 } 00426 node* lookup(const key_type& key) const 00427 { 00428 const hash_value_t hash = hash_func(key); 00429 uint32 i = hash & m_capacityMask; 00430 node* n = m_nodes + i; 00431 // In theory, we could try to use compare_key here, it would 00432 // be a little bit faster for keys with cheap_compare. However, if keys are 00433 // not totally destroyed on removal (erase_node), this could result in returning 00434 // unused nodes. By testing hashes - we make sure it does not happen. 00435 // This could also be solved by doing what Google does -- set_empty_key/set_deleted_key, 00436 // but for the time being it doesn't look to me like it's worth it. 00437 if (n->hash == hash && m_keyEqualFunc(key, n->data.first)) 00438 return n; 00439 00440 uint32 numProbes(0); 00441 // Guarantees loop termination. 00442 RDE_ASSERT(m_capacity == 0 || m_numUsed < m_capacity); 00443 while (!n->is_unused()) 00444 { 00445 ++numProbes; 00446 i = (i + numProbes) & m_capacityMask; 00447 n = m_nodes + i; 00448 00449 if (compare_key(n, key, hash)) 00450 return n; 00451 } 00452 return m_nodes + m_capacity; 00453 } 00454 00455 static void rehash(int new_capacity, node* new_nodes, 00456 int capacity, const node* nodes, bool destruct_original) 00457 { 00458 //if (nodes == &ms_emptyNode || new_nodes == &ms_emptyNode) 00459 // return; 00460 00461 const node* it = nodes; 00462 const node* itEnd = nodes + capacity; 00463 const uint32 mask = new_capacity - 1; 00464 while (it != itEnd) 00465 { 00466 if (it->is_occupied()) 00467 { 00468 const hash_value_t hash = it->hash; 00469 uint32 i = hash & mask; 00470 00471 node* n = new_nodes + i; 00472 uint32 numProbes(0); 00473 while (!n->is_unused()) 00474 { 00475 ++numProbes; 00476 i = (i + numProbes) & mask; 00477 n = new_nodes + i; 00478 } 00479 rde::copy_construct(&n->data, it->data); 00480 n->hash = hash; 00481 if (destruct_original) 00482 rde::destruct(&it->data); 00483 } 00484 ++it; 00485 } 00486 } 00487 00488 node* allocate_nodes(int n) 00489 { 00490 node* buckets = static_cast<node*>(m_allocator.allocate(n * sizeof(node))); 00491 node* iterBuckets(buckets); 00492 node* end = iterBuckets + n; 00493 for (; iterBuckets != end; ++iterBuckets) 00494 iterBuckets->hash = node::kUnusedHash; 00495 00496 return buckets; 00497 } 00498 void delete_nodes() 00499 { 00500 node* it = m_nodes; 00501 node* itEnd = it + m_capacity; 00502 while (it != itEnd) 00503 { 00504 if (it && it->is_occupied()) 00505 rde::destruct(&it->data); 00506 ++it; 00507 } 00508 if (m_nodes != &ms_emptyNode) 00509 m_allocator.deallocate(m_nodes, sizeof(node) * m_capacity); 00510 00511 m_capacity = 0; 00512 m_capacityMask = 0; 00513 m_size = 0; 00514 } 00515 void erase_node(node* n) 00516 { 00517 RDE_ASSERT(!empty()); 00518 RDE_ASSERT(n->is_occupied()); 00519 rde::destruct(&n->data); 00520 n->hash = node::kDeletedHash; 00521 --m_size; 00522 } 00523 00524 RDE_FORCEINLINE hash_value_t hash_func(const key_type& key) const 00525 { 00526 const hash_value_t h = m_hashFunc(key) & 0xFFFFFFFD; 00527 //RDE_ASSERT(h < node::kDeletedHash); 00528 return h; 00529 } 00530 bool invariant() const 00531 { 00532 RDE_ASSERT((m_capacity & (m_capacity - 1)) == 0); 00533 RDE_ASSERT(m_numUsed >= m_size); 00534 return true; 00535 } 00536 00537 RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash, 00538 int_to_type<false>) const 00539 { 00540 return (n->hash == hash && m_keyEqualFunc(key, n->data.first)); 00541 } 00542 RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t, 00543 int_to_type<true>) const 00544 { 00545 return m_keyEqualFunc(key, n->data.first); 00546 } 00547 RDE_FORCEINLINE bool compare_key(const node* n, const key_type& key, hash_value_t hash) const 00548 { 00549 return compare_key(n, key, hash, int_to_type<has_cheap_compare<TKey>::value>()); 00550 } 00551 00552 node* m_nodes; 00553 int m_size; 00554 int m_capacity; 00555 uint32 m_capacityMask; 00556 int m_numUsed; 00557 THashFunc m_hashFunc; 00558 TKeyEqualFunc m_keyEqualFunc; 00559 TAllocator m_allocator; 00560 00561 static node ms_emptyNode; 00562 }; 00563 00564 00565 // Holy ... 00566 template<typename TKey, typename TValue, 00567 class THashFunc, 00568 int TLoadFactor4, 00569 class TKeyEqualFunc, 00570 class TAllocator> 00571 typename hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::node hash_map<TKey, TValue, THashFunc, TLoadFactor4, TKeyEqualFunc, TAllocator>::ms_emptyNode; 00572 00573 } 00574 00575 #endif