|
Sierra Toolkit
Version of the Day
|
00001 // Copyright (c) 2005, Google Inc. 00002 // All rights reserved. 00003 // 00004 // Redistribution and use in source and binary forms, with or without 00005 // modification, are permitted provided that the following conditions are 00006 // met: 00007 // 00008 // * Redistributions of source code must retain the above copyright 00009 // notice, this list of conditions and the following disclaimer. 00010 // * Redistributions in binary form must reproduce the above 00011 // copyright notice, this list of conditions and the following disclaimer 00012 // in the documentation and/or other materials provided with the 00013 // distribution. 00014 // * Neither the name of Google Inc. nor the names of its 00015 // contributors may be used to endorse or promote products derived from 00016 // this software without specific prior written permission. 00017 // 00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 00030 // --- 00031 // Author: Craig Silverstein 00032 // 00033 // A dense hashtable is a particular implementation of 00034 // a hashtable: one that is meant to minimize memory allocation. 00035 // It does this by using an array to store all the data. We 00036 // steal a value from the key space to indicate "empty" array 00037 // elements (ie indices where no item lives) and another to indicate 00038 // "deleted" elements. 00039 // 00040 // (Note it is possible to change the value of the delete key 00041 // on the fly; you can even remove it, though after that point 00042 // the hashtable is insert_only until you set it again. The empty 00043 // value however can't be changed.) 00044 // 00045 // To minimize allocation and pointer overhead, we use internal 00046 // probing, in which the hashtable is a single table, and collisions 00047 // are resolved by trying to insert again in another bucket. The 00048 // most cache-efficient internal probing schemes are linear probing 00049 // (which suffers, alas, from clumping) and quadratic probing, which 00050 // is what we implement by default. 00051 // 00052 // Type requirements: value_type is required to be Copy Constructible 00053 // and Default Constructible. It is not required to be (and commonly 00054 // isn't) Assignable. 00055 // 00056 // You probably shouldn't use this code directly. Use 00057 // <google/dense_hash_map> or <google/dense_hash_set> instead. 00058 00059 // You can change the following below: 00060 // HT_OCCUPANCY_PCT -- how full before we double size 00061 // HT_EMPTY_PCT -- how empty before we halve size 00062 // HT_MIN_BUCKETS -- default smallest bucket size 00063 // 00064 // You can also change enlarge_factor (which defaults to 00065 // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to 00066 // HT_EMPTY_PCT) with set_resizing_parameters(). 00067 // 00068 // How to decide what values to use? 00069 // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. 00070 // HT_MIN_BUCKETS is probably unnecessary since you can specify 00071 // (indirectly) the starting number of buckets at construct-time. 00072 // For enlarge_factor, you can use this chart to try to trade-off 00073 // expected lookup time to the space taken up. By default, this 00074 // code uses quadratic probing, though you can change it to linear 00075 // via _JUMP below if you really want to. 00076 // 00077 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html 00078 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful 00079 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) 00080 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 00081 // 00082 // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 00083 // QUADRATIC COLLISION RES. 00084 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 00085 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 00086 // LINEAR COLLISION RES. 00087 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 00088 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 00089 00090 #ifndef _DENSEHASHTABLE_H_ 00091 #define _DENSEHASHTABLE_H_ 00092 00093 // The probing method 00094 // Linear probing 00095 // #define JUMP_(key, num_probes) ( 1 ) 00096 // Quadratic probing 00097 #define JUMP_(key, num_probes) ( num_probes ) 00098 00099 00100 #include <stk_util/util/sparseconfig.h> 00101 #include <stdio.h> 00102 #include <assert.h> 00103 #include <stdlib.h> // for abort() 00104 #include <algorithm> // For swap(), eg 00105 #include <stdexcept> // For length_error 00106 #include <iostream> // For cerr 00107 #include <memory> // For uninitialized_fill, uninitialized_copy 00108 #include <utility> // for pair<> 00109 #include <iterator> // for facts about iterator tags 00110 #include <limits> // for numeric_limits<> 00111 #include <stk_util/util/libc_allocator_with_realloc.h> 00112 #include <stk_util/util/hashtable-common.h> 00113 #include <stk_util/util/type_traits_google.h> // for true_type, integral_constant, etc. 00114 00115 _START_GOOGLE_NAMESPACE_ 00116 00117 using STL_NAMESPACE::pair; 00118 00119 // Hashtable class, used to implement the hashed associative containers 00120 // hash_set and hash_map. 00121 00122 // Value: what is stored in the table (each bucket is a Value). 00123 // Key: something in a 1-to-1 correspondence to a Value, that can be used 00124 // to search for a Value in the table (find() takes a Key). 00125 // HashFcn: Takes a Key and returns an integer, the more unique the better. 00126 // ExtractKey: given a Value, returns the unique Key associated with it. 00127 // Must inherit from unary_function, or at least have a 00128 // result_type enum indicating the return type of operator(). 00129 // SetKey: given a Value* and a Key, modifies the value such that 00130 // ExtractKey(value) == key. We guarantee this is only called 00131 // with key == deleted_key or key == empty_key. 00132 // EqualKey: Given two Keys, says whether they are the same (that is, 00133 // if they are both associated with the same Value). 00134 // Alloc: STL allocator to use to allocate memory. 00135 00136 template <class Value, class Key, class HashFcn, 00137 class ExtractKey, class SetKey, class EqualKey, class Alloc> 00138 class dense_hashtable; 00139 00140 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00141 struct dense_hashtable_iterator; 00142 00143 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00144 struct dense_hashtable_const_iterator; 00145 00146 // We're just an array, but we need to skip over empty and deleted elements 00147 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00148 struct dense_hashtable_iterator { 00149 private: 00150 typedef typename A::template rebind<V>::other value_alloc_type; 00151 00152 public: 00153 typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator; 00154 typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator; 00155 00156 typedef STL_NAMESPACE::forward_iterator_tag iterator_category; 00157 typedef V value_type; 00158 typedef typename value_alloc_type::difference_type difference_type; 00159 typedef typename value_alloc_type::size_type size_type; 00160 typedef typename value_alloc_type::reference reference; 00161 typedef typename value_alloc_type::pointer pointer; 00162 00163 // "Real" constructor and default constructor 00164 dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h, 00165 pointer it, pointer it_end, bool advance) 00166 : ht(h), pos(it), end(it_end) { 00167 if (advance) advance_past_empty_and_deleted(); 00168 } 00169 dense_hashtable_iterator() { } 00170 // The default destructor is fine; we don't define one 00171 // The default operator= is fine; we don't define one 00172 00173 // Happy dereferencer 00174 reference operator*() const { return *pos; } 00175 pointer operator->() const { return &(operator*()); } 00176 00177 // Arithmetic. The only hard part is making sure that 00178 // we're not on an empty or marked-deleted array element 00179 void advance_past_empty_and_deleted() { 00180 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) 00181 ++pos; 00182 } 00183 iterator& operator++() { 00184 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; 00185 } 00186 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } 00187 00188 // Comparison. 00189 bool operator==(const iterator& it) const { return pos == it.pos; } 00190 bool operator!=(const iterator& it) const { return pos != it.pos; } 00191 00192 00193 // The actual data 00194 const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht; 00195 pointer pos, end; 00196 }; 00197 00198 00199 // Now do it all again, but with const-ness! 00200 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00201 struct dense_hashtable_const_iterator { 00202 private: 00203 typedef typename A::template rebind<V>::other value_alloc_type; 00204 00205 public: 00206 typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator; 00207 typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator; 00208 00209 typedef STL_NAMESPACE::forward_iterator_tag iterator_category; 00210 typedef V value_type; 00211 typedef typename value_alloc_type::difference_type difference_type; 00212 typedef typename value_alloc_type::size_type size_type; 00213 typedef typename value_alloc_type::const_reference reference; 00214 typedef typename value_alloc_type::const_pointer pointer; 00215 00216 // "Real" constructor and default constructor 00217 dense_hashtable_const_iterator( 00218 const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h, 00219 pointer it, pointer it_end, bool advance) 00220 : ht(h), pos(it), end(it_end) { 00221 if (advance) advance_past_empty_and_deleted(); 00222 } 00223 dense_hashtable_const_iterator() 00224 : ht(NULL), pos(pointer()), end(pointer()) { } 00225 // This lets us convert regular iterators to const iterators 00226 dense_hashtable_const_iterator(const iterator &it) 00227 : ht(it.ht), pos(it.pos), end(it.end) { } 00228 // The default destructor is fine; we don't define one 00229 // The default operator= is fine; we don't define one 00230 00231 // Happy dereferencer 00232 reference operator*() const { return *pos; } 00233 pointer operator->() const { return &(operator*()); } 00234 00235 // Arithmetic. The only hard part is making sure that 00236 // we're not on an empty or marked-deleted array element 00237 void advance_past_empty_and_deleted() { 00238 while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) 00239 ++pos; 00240 } 00241 const_iterator& operator++() { 00242 assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; 00243 } 00244 const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } 00245 00246 // Comparison. 00247 bool operator==(const const_iterator& it) const { return pos == it.pos; } 00248 bool operator!=(const const_iterator& it) const { return pos != it.pos; } 00249 00250 00251 // The actual data 00252 const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht; 00253 pointer pos, end; 00254 }; 00255 00256 template <class Value, class Key, class HashFcn, 00257 class ExtractKey, class SetKey, class EqualKey, class Alloc> 00258 class dense_hashtable { 00259 private: 00260 typedef typename Alloc::template rebind<Value>::other value_alloc_type; 00261 00262 public: 00263 typedef Key key_type; 00264 typedef Value value_type; 00265 typedef HashFcn hasher; 00266 typedef EqualKey key_equal; 00267 typedef Alloc allocator_type; 00268 00269 typedef typename value_alloc_type::size_type size_type; 00270 typedef typename value_alloc_type::difference_type difference_type; 00271 typedef typename value_alloc_type::reference reference; 00272 typedef typename value_alloc_type::const_reference const_reference; 00273 typedef typename value_alloc_type::pointer pointer; 00274 typedef typename value_alloc_type::const_pointer const_pointer; 00275 typedef dense_hashtable_iterator<Value, Key, HashFcn, 00276 ExtractKey, SetKey, EqualKey, Alloc> 00277 iterator; 00278 00279 typedef dense_hashtable_const_iterator<Value, Key, HashFcn, 00280 ExtractKey, SetKey, EqualKey, Alloc> 00281 const_iterator; 00282 00283 // These come from tr1. For us they're the same as regular iterators. 00284 typedef iterator local_iterator; 00285 typedef const_iterator const_local_iterator; 00286 00287 // How full we let the table get before we resize, by default. 00288 // Knuth says .8 is good -- higher causes us to probe too much, 00289 // though it saves memory. 00290 static const int HT_OCCUPANCY_PCT; // = 50 (out of 100) 00291 00292 // How empty we let the table get before we resize lower, by default. 00293 // (0.0 means never resize lower.) 00294 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing 00295 static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT; 00296 00297 // Minimum size we're willing to let hashtables be. 00298 // Must be a power of two, and at least 4. 00299 // Note, however, that for a given hashtable, the initial size is a 00300 // function of the first constructor arg, and may be >HT_MIN_BUCKETS. 00301 static const size_type HT_MIN_BUCKETS = 4; 00302 00303 // By default, if you don't specify a hashtable size at 00304 // construction-time, we use this size. Must be a power of two, and 00305 // at least HT_MIN_BUCKETS. 00306 static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; 00307 00308 // ITERATOR FUNCTIONS 00309 iterator begin() { return iterator(this, table, 00310 table + num_buckets, true); } 00311 iterator end() { return iterator(this, table + num_buckets, 00312 table + num_buckets, true); } 00313 const_iterator begin() const { return const_iterator(this, table, 00314 table+num_buckets,true);} 00315 const_iterator end() const { return const_iterator(this, table + num_buckets, 00316 table+num_buckets,true);} 00317 00318 // These come from tr1 unordered_map. They iterate over 'bucket' n. 00319 // We'll just consider bucket n to be the n-th element of the table. 00320 local_iterator begin(size_type i) { 00321 return local_iterator(this, table + i, table + i+1, false); 00322 } 00323 local_iterator end(size_type i) { 00324 local_iterator it = begin(i); 00325 if (!test_empty(i) && !test_deleted(i)) 00326 ++it; 00327 return it; 00328 } 00329 const_local_iterator begin(size_type i) const { 00330 return const_local_iterator(this, table + i, table + i+1, false); 00331 } 00332 const_local_iterator end(size_type i) const { 00333 const_local_iterator it = begin(i); 00334 if (!test_empty(i) && !test_deleted(i)) 00335 ++it; 00336 return it; 00337 } 00338 00339 // ACCESSOR FUNCTIONS for the things we templatize on, basically 00340 hasher hash_funct() const { return settings; } 00341 key_equal key_eq() const { return key_info; } 00342 allocator_type get_allocator() const { 00343 return allocator_type(val_info); 00344 } 00345 00346 // Accessor function for statistics gathering. 00347 int num_table_copies() const { return settings.num_ht_copies(); } 00348 00349 private: 00350 // Annoyingly, we can't copy values around, because they might have 00351 // const components (they're probably pair<const X, Y>). We use 00352 // explicit destructor invocation and placement new to get around 00353 // this. Arg. 00354 void set_value(pointer dst, const_reference src) { 00355 dst->~value_type(); // delete the old value, if any 00356 new(dst) value_type(src); 00357 } 00358 00359 void destroy_buckets(size_type first, size_type last) { 00360 for ( ; first != last; ++first) 00361 table[first].~value_type(); 00362 } 00363 00364 // DELETE HELPER FUNCTIONS 00365 // This lets the user describe a key that will indicate deleted 00366 // table entries. This key should be an "impossible" entry -- 00367 // if you try to insert it for real, you won't be able to retrieve it! 00368 // (NB: while you pass in an entire value, only the key part is looked 00369 // at. This is just because I don't know how to assign just a key.) 00370 private: 00371 void squash_deleted() { // gets rid of any deleted entries we have 00372 if ( num_deleted ) { // get rid of deleted before writing 00373 dense_hashtable tmp(*this); // copying will get rid of deleted 00374 swap(tmp); // now we are tmp 00375 } 00376 assert(num_deleted == 0); 00377 } 00378 00379 bool test_deleted_key(const key_type& key) const { 00380 // The num_deleted test is crucial for read(): after read(), the ht values 00381 // are garbage, and we don't want to think some of them are deleted. 00382 // Invariant: !use_deleted implies num_deleted is 0. 00383 assert(settings.use_deleted() || num_deleted == 0); 00384 return num_deleted > 0 && equals(key_info.delkey, key); 00385 } 00386 00387 public: 00388 void set_deleted_key(const key_type &key) { 00389 // the empty indicator (if specified) and the deleted indicator 00390 // must be different 00391 assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) 00392 && "Passed the empty-key to set_deleted_key"); 00393 // It's only safe to change what "deleted" means if we purge deleted guys 00394 squash_deleted(); 00395 settings.set_use_deleted(true); 00396 key_info.delkey = key; 00397 } 00398 void clear_deleted_key() { 00399 squash_deleted(); 00400 settings.set_use_deleted(false); 00401 } 00402 key_type deleted_key() const { 00403 assert(settings.use_deleted() 00404 && "Must set deleted key before calling deleted_key"); 00405 return key_info.delkey; 00406 } 00407 00408 // These are public so the iterators can use them 00409 // True if the item at position bucknum is "deleted" marker 00410 bool test_deleted(size_type bucknum) const { 00411 return test_deleted_key(get_key(table[bucknum])); 00412 } 00413 bool test_deleted(const iterator &it) const { 00414 return test_deleted_key(get_key(*it)); 00415 } 00416 bool test_deleted(const const_iterator &it) const { 00417 return test_deleted_key(get_key(*it)); 00418 } 00419 00420 private: 00421 // Set it so test_deleted is true. true if object didn't used to be deleted. 00422 bool set_deleted(iterator &it) { 00423 assert(settings.use_deleted()); 00424 bool retval = !test_deleted(it); 00425 // &* converts from iterator to value-type. 00426 set_key(&(*it), key_info.delkey); 00427 return retval; 00428 } 00429 // Set it so test_deleted is false. true if object used to be deleted. 00430 bool clear_deleted(iterator &it) { 00431 assert(settings.use_deleted()); 00432 // Happens automatically when we assign something else in its place. 00433 return test_deleted(it); 00434 } 00435 00436 // We also allow to set/clear the deleted bit on a const iterator. 00437 // We allow a const_iterator for the same reason you can delete a 00438 // const pointer: it's convenient, and semantically you can't use 00439 // 'it' after it's been deleted anyway, so its const-ness doesn't 00440 // really matter. 00441 bool set_deleted(const_iterator &it) { 00442 assert(settings.use_deleted()); 00443 bool retval = !test_deleted(it); 00444 set_key(const_cast<pointer>(&(*it)), key_info.delkey); 00445 return retval; 00446 } 00447 // Set it so test_deleted is false. true if object used to be deleted. 00448 bool clear_deleted(const_iterator &it) { 00449 assert(settings.use_deleted()); 00450 return test_deleted(it); 00451 } 00452 00453 // EMPTY HELPER FUNCTIONS 00454 // This lets the user describe a key that will indicate empty (unused) 00455 // table entries. This key should be an "impossible" entry -- 00456 // if you try to insert it for real, you won't be able to retrieve it! 00457 // (NB: while you pass in an entire value, only the key part is looked 00458 // at. This is just because I don't know how to assign just a key.) 00459 public: 00460 // These are public so the iterators can use them 00461 // True if the item at position bucknum is "empty" marker 00462 bool test_empty(size_type bucknum) const { 00463 assert(settings.use_empty()); // we always need to know what's empty! 00464 return equals(get_key(val_info.emptyval), get_key(table[bucknum])); 00465 } 00466 bool test_empty(const iterator &it) const { 00467 assert(settings.use_empty()); // we always need to know what's empty! 00468 return equals(get_key(val_info.emptyval), get_key(*it)); 00469 } 00470 bool test_empty(const const_iterator &it) const { 00471 assert(settings.use_empty()); // we always need to know what's empty! 00472 return equals(get_key(val_info.emptyval), get_key(*it)); 00473 } 00474 00475 private: 00476 void fill_range_with_empty(pointer table_start, pointer table_end) { 00477 STL_NAMESPACE::uninitialized_fill(table_start, table_end, val_info.emptyval); 00478 } 00479 00480 public: 00481 // TODO(csilvers): change all callers of this to pass in a key instead, 00482 // and take a const key_type instead of const value_type. 00483 void set_empty_key(const_reference val) { 00484 // Once you set the empty key, you can't change it 00485 assert(!settings.use_empty() && "Calling set_empty_key multiple times"); 00486 // The deleted indicator (if specified) and the empty indicator 00487 // must be different. 00488 assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey)) 00489 && "Setting the empty key the same as the deleted key"); 00490 settings.set_use_empty(true); 00491 set_value(&val_info.emptyval, val); 00492 00493 assert(!table); // must set before first use 00494 // num_buckets was set in constructor even though table was NULL 00495 table = val_info.allocate(num_buckets); 00496 assert(table); 00497 fill_range_with_empty(table, table + num_buckets); 00498 } 00499 // TODO(sjackman): return a key_type rather than a value_type 00500 value_type empty_key() const { 00501 assert(settings.use_empty()); 00502 return val_info.emptyval; 00503 } 00504 00505 // FUNCTIONS CONCERNING SIZE 00506 public: 00507 size_type size() const { return num_elements - num_deleted; } 00508 size_type max_size() const { return val_info.max_size(); } 00509 bool empty() const { return size() == 0; } 00510 size_type bucket_count() const { return num_buckets; } 00511 size_type max_bucket_count() const { return max_size(); } 00512 size_type nonempty_bucket_count() const { return num_elements; } 00513 // These are tr1 methods. Their idea of 'bucket' doesn't map well to 00514 // what we do. We just say every bucket has 0 or 1 items in it. 00515 size_type bucket_size(size_type i) const { 00516 return begin(i) == end(i) ? 0 : 1; 00517 } 00518 00519 private: 00520 // Because of the above, size_type(-1) is never legal; use it for errors 00521 static const size_type ILLEGAL_BUCKET = size_type(-1); 00522 00523 // Used after a string of deletes. Returns true if we actually shrunk. 00524 // TODO(csilvers): take a delta so we can take into account inserts 00525 // done after shrinking. Maybe make part of the Settings class? 00526 bool maybe_shrink() { 00527 assert(num_elements >= num_deleted); 00528 assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two 00529 assert(bucket_count() >= HT_MIN_BUCKETS); 00530 bool retval = false; 00531 00532 // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, 00533 // we'll never shrink until you get relatively big, and we'll never 00534 // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something 00535 // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will 00536 // shrink us down to HT_MIN_BUCKETS buckets, which is too small. 00537 const size_type num_remain = num_elements - num_deleted; 00538 const size_type shrink_threshold = settings.shrink_threshold(); 00539 if (shrink_threshold > 0 && num_remain < shrink_threshold && 00540 bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { 00541 const float shrink_factor = settings.shrink_factor(); 00542 size_type sz = bucket_count() / 2; // find how much we should shrink 00543 while (sz > HT_DEFAULT_STARTING_BUCKETS && 00544 num_remain < sz * shrink_factor) { 00545 sz /= 2; // stay a power of 2 00546 } 00547 dense_hashtable tmp(*this, sz); // Do the actual resizing 00548 swap(tmp); // now we are tmp 00549 retval = true; 00550 } 00551 settings.set_consider_shrink(false); // because we just considered it 00552 return retval; 00553 } 00554 00555 // We'll let you resize a hashtable -- though this makes us copy all! 00556 // When you resize, you say, "make it big enough for this many more elements" 00557 // Returns true if we actually resized, false if size was already ok. 00558 bool resize_delta(size_type delta) { 00559 bool did_resize = false; 00560 if ( settings.consider_shrink() ) { // see if lots of deletes happened 00561 if ( maybe_shrink() ) 00562 did_resize = true; 00563 } 00564 if (num_elements >= (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta) 00565 throw std::length_error("resize overflow"); 00566 if ( bucket_count() >= HT_MIN_BUCKETS && 00567 (num_elements + delta) <= settings.enlarge_threshold() ) 00568 return did_resize; // we're ok as we are 00569 00570 // Sometimes, we need to resize just to get rid of all the 00571 // "deleted" buckets that are clogging up the hashtable. So when 00572 // deciding whether to resize, count the deleted buckets (which 00573 // are currently taking up room). But later, when we decide what 00574 // size to resize to, *don't* count deleted buckets, since they 00575 // get discarded during the resize. 00576 const size_type needed_size = settings.min_buckets(num_elements + delta, 0); 00577 if ( needed_size <= bucket_count() ) // we have enough buckets 00578 return did_resize; 00579 00580 size_type resize_to = 00581 settings.min_buckets(num_elements - num_deleted + delta, bucket_count()); 00582 00583 if (resize_to < needed_size && // may double resize_to 00584 resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) { 00585 // This situation means that we have enough deleted elements, 00586 // that once we purge them, we won't actually have needed to 00587 // grow. But we may want to grow anyway: if we just purge one 00588 // element, say, we'll have to grow anyway next time we 00589 // insert. Might as well grow now, since we're already going 00590 // through the trouble of copying (in order to purge the 00591 // deleted elements). 00592 const size_type target = 00593 static_cast<size_type>(settings.shrink_size(resize_to*2)); 00594 if (num_elements - num_deleted + delta >= target) { 00595 // Good, we won't be below the shrink threshhold even if we double. 00596 resize_to *= 2; 00597 } 00598 } 00599 dense_hashtable tmp(*this, resize_to); 00600 swap(tmp); // now we are tmp 00601 return true; 00602 } 00603 00604 // We require table be not-NULL and empty before calling this. 00605 void resize_table(size_type /*old_size*/, size_type new_size, 00606 true_type) { 00607 table = val_info.realloc_or_die(table, new_size); 00608 } 00609 00610 void resize_table(size_type old_size, size_type new_size, false_type) { 00611 val_info.deallocate(table, old_size); 00612 table = val_info.allocate(new_size); 00613 } 00614 00615 // Used to actually do the rehashing when we grow/shrink a hashtable 00616 void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) { 00617 clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted)); 00618 00619 // We use a normal iterator to get non-deleted bcks from ht 00620 // We could use insert() here, but since we know there are 00621 // no duplicates and no deleted items, we can be more efficient 00622 assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two 00623 for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { 00624 size_type num_probes = 0; // how many times we've probed 00625 size_type bucknum; 00626 const size_type bucket_count_minus_one = bucket_count() - 1; 00627 for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; 00628 !test_empty(bucknum); // not empty 00629 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { 00630 ++num_probes; 00631 assert(num_probes < bucket_count() 00632 && "Hashtable is full: an error in key_equal<> or hash<>"); 00633 } 00634 set_value(&table[bucknum], *it); // copies the value to here 00635 num_elements++; 00636 } 00637 settings.inc_num_ht_copies(); 00638 } 00639 00640 // Required by the spec for hashed associative container 00641 public: 00642 // Though the docs say this should be num_buckets, I think it's much 00643 // more useful as num_elements. As a special feature, calling with 00644 // req_elements==0 will cause us to shrink if we can, saving space. 00645 void resize(size_type req_elements) { // resize to this or larger 00646 if ( settings.consider_shrink() || req_elements == 0 ) 00647 maybe_shrink(); 00648 if ( req_elements > num_elements ) 00649 resize_delta(req_elements - num_elements); 00650 } 00651 00652 // Get and change the value of shrink_factor and enlarge_factor. The 00653 // description at the beginning of this file explains how to choose 00654 // the values. Setting the shrink parameter to 0.0 ensures that the 00655 // table never shrinks. 00656 void get_resizing_parameters(float* shrink, float* grow) const { 00657 *shrink = settings.shrink_factor(); 00658 *grow = settings.enlarge_factor(); 00659 } 00660 void set_resizing_parameters(float shrink, float grow) { 00661 settings.set_resizing_parameters(shrink, grow); 00662 settings.reset_thresholds(bucket_count()); 00663 } 00664 00665 // CONSTRUCTORS -- as required by the specs, we take a size, 00666 // but also let you specify a hashfunction, key comparator, 00667 // and key extractor. We also define a copy constructor and =. 00668 // DESTRUCTOR -- needs to free the table 00669 explicit dense_hashtable(size_type expected_max_items_in_table = 0, 00670 const HashFcn& hf = HashFcn(), 00671 const EqualKey& eql = EqualKey(), 00672 const ExtractKey& ext = ExtractKey(), 00673 const SetKey& set = SetKey(), 00674 const Alloc& alloc = Alloc()) 00675 : settings(hf), 00676 key_info(ext, set, eql), 00677 num_deleted(0), 00678 num_elements(0), 00679 num_buckets(expected_max_items_in_table == 0 00680 ? HT_DEFAULT_STARTING_BUCKETS 00681 : settings.min_buckets(expected_max_items_in_table, 0)), 00682 val_info(alloc_impl<value_alloc_type>(alloc)), 00683 table(NULL) { 00684 // table is NULL until emptyval is set. However, we set num_buckets 00685 // here so we know how much space to allocate once emptyval is set 00686 settings.reset_thresholds(bucket_count()); 00687 } 00688 00689 // As a convenience for resize(), we allow an optional second argument 00690 // which lets you make this new hashtable a different size than ht 00691 dense_hashtable(const dense_hashtable& ht, 00692 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) 00693 : settings(ht.settings), 00694 key_info(ht.key_info), 00695 num_deleted(0), 00696 num_elements(0), 00697 num_buckets(0), 00698 val_info(ht.val_info), 00699 table(NULL) { 00700 if (!ht.settings.use_empty()) { 00701 // If use_empty isn't set, copy_from will crash, so we do our own copying. 00702 assert(ht.empty()); 00703 num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); 00704 settings.reset_thresholds(bucket_count()); 00705 return; 00706 } 00707 settings.reset_thresholds(bucket_count()); 00708 copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries 00709 } 00710 00711 dense_hashtable& operator= (const dense_hashtable& ht) { 00712 if (&ht == this) return *this; // don't copy onto ourselves 00713 if (!ht.settings.use_empty()) { 00714 assert(ht.empty()); 00715 dense_hashtable empty_table(ht); // empty table with ht's thresholds 00716 this->swap(empty_table); 00717 return *this; 00718 } 00719 settings = ht.settings; 00720 key_info = ht.key_info; 00721 set_value(&val_info.emptyval, ht.val_info.emptyval); 00722 // copy_from() calls clear and sets num_deleted to 0 too 00723 copy_from(ht, HT_MIN_BUCKETS); 00724 // we purposefully don't copy the allocator, which may not be copyable 00725 return *this; 00726 } 00727 00728 ~dense_hashtable() { 00729 if (table) { 00730 destroy_buckets(0, num_buckets); 00731 val_info.deallocate(table, num_buckets); 00732 } 00733 } 00734 00735 // Many STL algorithms use swap instead of copy constructors 00736 void swap(dense_hashtable& ht) { 00737 STL_NAMESPACE::swap(settings, ht.settings); 00738 STL_NAMESPACE::swap(key_info, ht.key_info); 00739 STL_NAMESPACE::swap(num_deleted, ht.num_deleted); 00740 STL_NAMESPACE::swap(num_elements, ht.num_elements); 00741 STL_NAMESPACE::swap(num_buckets, ht.num_buckets); 00742 { value_type tmp; // for annoying reasons, swap() doesn't work 00743 set_value(&tmp, val_info.emptyval); 00744 set_value(&val_info.emptyval, ht.val_info.emptyval); 00745 set_value(&ht.val_info.emptyval, tmp); 00746 } 00747 STL_NAMESPACE::swap(table, ht.table); 00748 settings.reset_thresholds(bucket_count()); // this also resets consider_shrink 00749 ht.settings.reset_thresholds(bucket_count()); 00750 // we purposefully don't swap the allocator, which may not be swap-able 00751 } 00752 00753 private: 00754 void clear_to_size(size_type new_num_buckets) { 00755 if (!table) { 00756 table = val_info.allocate(new_num_buckets); 00757 } else { 00758 destroy_buckets(0, num_buckets); 00759 if (new_num_buckets != num_buckets) { // resize, if necessary 00760 typedef integral_constant<bool, 00761 is_same<value_alloc_type, 00762 libc_allocator_with_realloc<value_type> >::value> 00763 realloc_ok; 00764 resize_table(num_buckets, new_num_buckets, realloc_ok()); 00765 } 00766 } 00767 assert(table); 00768 fill_range_with_empty(table, table + new_num_buckets); 00769 num_elements = 0; 00770 num_deleted = 0; 00771 num_buckets = new_num_buckets; // our new size 00772 settings.reset_thresholds(bucket_count()); 00773 } 00774 00775 public: 00776 // It's always nice to be able to clear a table without deallocating it 00777 void clear() { 00778 // If the table is already empty, and the number of buckets is 00779 // already as we desire, there's nothing to do. 00780 const size_type new_num_buckets = settings.min_buckets(0, 0); 00781 if (num_elements == 0 && new_num_buckets == num_buckets) { 00782 return; 00783 } 00784 clear_to_size(new_num_buckets); 00785 } 00786 00787 // Clear the table without resizing it. 00788 // Mimicks the stl_hashtable's behaviour when clear()-ing in that it 00789 // does not modify the bucket count 00790 void clear_no_resize() { 00791 if (num_elements > 0) { 00792 assert(table); 00793 destroy_buckets(0, num_buckets); 00794 fill_range_with_empty(table, table + num_buckets); 00795 } 00796 // don't consider to shrink before another erase() 00797 settings.reset_thresholds(bucket_count()); 00798 num_elements = 0; 00799 num_deleted = 0; 00800 } 00801 00802 // LOOKUP ROUTINES 00803 private: 00804 // Returns a pair of positions: 1st where the object is, 2nd where 00805 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET 00806 // if object is not found; 2nd is ILLEGAL_BUCKET if it is. 00807 // Note: because of deletions where-to-insert is not trivial: it's the 00808 // first deleted bucket we see, as long as we don't find the key later 00809 pair<size_type, size_type> find_position(const key_type &key) const { 00810 size_type num_probes = 0; // how many times we've probed 00811 const size_type bucket_count_minus_one = bucket_count() - 1; 00812 size_type bucknum = hash(key) & bucket_count_minus_one; 00813 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert 00814 while ( 1 ) { // probe until something happens 00815 if ( test_empty(bucknum) ) { // bucket is empty 00816 if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert 00817 return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum); 00818 else 00819 return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos); 00820 00821 } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert 00822 if ( insert_pos == ILLEGAL_BUCKET ) 00823 insert_pos = bucknum; 00824 00825 } else if ( equals(key, get_key(table[bucknum])) ) { 00826 return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET); 00827 } 00828 ++num_probes; // we're doing another probe 00829 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; 00830 assert(num_probes < bucket_count() 00831 && "Hashtable is full: an error in key_equal<> or hash<>"); 00832 } 00833 } 00834 00835 public: 00836 iterator find(const key_type& key) { 00837 if ( size() == 0 ) return end(); 00838 pair<size_type, size_type> pos = find_position(key); 00839 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there 00840 return end(); 00841 else 00842 return iterator(this, table + pos.first, table + num_buckets, false); 00843 } 00844 00845 const_iterator find(const key_type& key) const { 00846 if ( size() == 0 ) return end(); 00847 pair<size_type, size_type> pos = find_position(key); 00848 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there 00849 return end(); 00850 else 00851 return const_iterator(this, table + pos.first, table+num_buckets, false); 00852 } 00853 00854 // This is a tr1 method: the bucket a given key is in, or what bucket 00855 // it would be put in, if it were to be inserted. Shrug. 00856 size_type bucket(const key_type& key) const { 00857 pair<size_type, size_type> pos = find_position(key); 00858 return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; 00859 } 00860 00861 // Counts how many elements have key key. For maps, it's either 0 or 1. 00862 size_type count(const key_type &key) const { 00863 pair<size_type, size_type> pos = find_position(key); 00864 return pos.first == ILLEGAL_BUCKET ? 0 : 1; 00865 } 00866 00867 // Likewise, equal_range doesn't really make sense for us. Oh well. 00868 pair<iterator,iterator> equal_range(const key_type& key) { 00869 iterator pos = find(key); // either an iterator or end 00870 if (pos == end()) { 00871 return pair<iterator,iterator>(pos, pos); 00872 } else { 00873 const iterator startpos = pos++; 00874 return pair<iterator,iterator>(startpos, pos); 00875 } 00876 } 00877 pair<const_iterator,const_iterator> equal_range(const key_type& key) const { 00878 const_iterator pos = find(key); // either an iterator or end 00879 if (pos == end()) { 00880 return pair<const_iterator,const_iterator>(pos, pos); 00881 } else { 00882 const const_iterator startpos = pos++; 00883 return pair<const_iterator,const_iterator>(startpos, pos); 00884 } 00885 } 00886 00887 00888 // INSERTION ROUTINES 00889 private: 00890 // Private method used by insert_noresize and find_or_insert. 00891 iterator insert_at(const_reference obj, size_type pos) { 00892 if (size() >= max_size()) 00893 throw std::length_error("insert overflow"); 00894 if ( test_deleted(pos) ) { // just replace if it's been del. 00895 // shrug: shouldn't need to be const. 00896 const_iterator delpos(this, table + pos, table + num_buckets, false); 00897 clear_deleted(delpos); 00898 assert( num_deleted > 0); 00899 --num_deleted; // used to be, now it isn't 00900 } else { 00901 ++num_elements; // replacing an empty bucket 00902 } 00903 set_value(&table[pos], obj); 00904 return iterator(this, table + pos, table + num_buckets, false); 00905 } 00906 00907 // If you know *this is big enough to hold obj, use this routine 00908 pair<iterator, bool> insert_noresize(const_reference obj) { 00909 // First, double-check we're not inserting delkey or emptyval 00910 assert((!settings.use_empty() || !equals(get_key(obj), 00911 get_key(val_info.emptyval))) 00912 && "Inserting the empty key"); 00913 assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) 00914 && "Inserting the deleted key"); 00915 const pair<size_type,size_type> pos = find_position(get_key(obj)); 00916 if ( pos.first != ILLEGAL_BUCKET) { // object was already there 00917 return pair<iterator,bool>(iterator(this, table + pos.first, 00918 table + num_buckets, false), 00919 false); // false: we didn't insert 00920 } else { // pos.second says where to put it 00921 return pair<iterator,bool>(insert_at(obj, pos.second), true); 00922 } 00923 } 00924 00925 // Specializations of insert(it, it) depending on the power of the iterator: 00926 // (1) Iterator supports operator-, resize before inserting 00927 template <class ForwardIterator> 00928 void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) { 00929 size_t dist = STL_NAMESPACE::distance(f, l); 00930 if (dist >= (std::numeric_limits<size_type>::max)()) 00931 throw std::length_error("insert-range overflow"); 00932 resize_delta(static_cast<size_type>(dist)); 00933 for ( ; dist > 0; --dist, ++f) { 00934 insert_noresize(*f); 00935 } 00936 } 00937 00938 // (2) Arbitrary iterator, can't tell how much to resize 00939 template <class InputIterator> 00940 void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) { 00941 for ( ; f != l; ++f) 00942 insert(*f); 00943 } 00944 00945 public: 00946 // This is the normal insert routine, used by the outside world 00947 pair<iterator, bool> insert(const_reference obj) { 00948 resize_delta(1); // adding an object, grow if need be 00949 return insert_noresize(obj); 00950 } 00951 00952 // When inserting a lot at a time, we specialize on the type of iterator 00953 template <class InputIterator> 00954 void insert(InputIterator f, InputIterator l) { 00955 // specializes on iterator type 00956 insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category()); 00957 } 00958 00959 // DefaultValue is a functor that takes a key and returns a value_type 00960 // representing the default value to be inserted if none is found. 00961 template <class DefaultValue> 00962 value_type& find_or_insert(const key_type& key) { 00963 // First, double-check we're not inserting emptykey or delkey 00964 assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) 00965 && "Inserting the empty key"); 00966 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) 00967 && "Inserting the deleted key"); 00968 const pair<size_type,size_type> pos = find_position(key); 00969 DefaultValue default_value; 00970 if ( pos.first != ILLEGAL_BUCKET) { // object was already there 00971 return table[pos.first]; 00972 } else if (resize_delta(1)) { // needed to rehash to make room 00973 // Since we resized, we can't use pos, so recalculate where to insert. 00974 return *insert_noresize(default_value(key)).first; 00975 } else { // no need to rehash, insert right here 00976 return *insert_at(default_value(key), pos.second); 00977 } 00978 } 00979 00980 // DELETION ROUTINES 00981 size_type erase(const key_type& key) { 00982 // First, double-check we're not trying to erase delkey or emptyval. 00983 assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) 00984 && "Erasing the empty key"); 00985 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) 00986 && "Erasing the deleted key"); 00987 const_iterator pos = find(key); // shrug: shouldn't need to be const 00988 if ( pos != end() ) { 00989 assert(!test_deleted(pos)); // or find() shouldn't have returned it 00990 set_deleted(pos); 00991 ++num_deleted; 00992 settings.set_consider_shrink(true); // will think about shrink after next insert 00993 return 1; // because we deleted one thing 00994 } else { 00995 return 0; // because we deleted nothing 00996 } 00997 } 00998 00999 // We return the iterator past the deleted item. 01000 void erase(iterator pos) { 01001 if ( pos == end() ) return; // sanity check 01002 if ( set_deleted(pos) ) { // true if object has been newly deleted 01003 ++num_deleted; 01004 settings.set_consider_shrink(true); // will think about shrink after next insert 01005 } 01006 } 01007 01008 void erase(iterator f, iterator l) { 01009 for ( ; f != l; ++f) { 01010 if ( set_deleted(f) ) // should always be true 01011 ++num_deleted; 01012 } 01013 settings.set_consider_shrink(true); // will think about shrink after next insert 01014 } 01015 01016 // We allow you to erase a const_iterator just like we allow you to 01017 // erase an iterator. This is in parallel to 'delete': you can delete 01018 // a const pointer just like a non-const pointer. The logic is that 01019 // you can't use the object after it's erased anyway, so it doesn't matter 01020 // if it's const or not. 01021 void erase(const_iterator pos) { 01022 if ( pos == end() ) return; // sanity check 01023 if ( set_deleted(pos) ) { // true if object has been newly deleted 01024 ++num_deleted; 01025 settings.set_consider_shrink(true); // will think about shrink after next insert 01026 } 01027 } 01028 void erase(const_iterator f, const_iterator l) { 01029 for ( ; f != l; ++f) { 01030 if ( set_deleted(f) ) // should always be true 01031 ++num_deleted; 01032 } 01033 settings.set_consider_shrink(true); // will think about shrink after next insert 01034 } 01035 01036 01037 // COMPARISON 01038 bool operator==(const dense_hashtable& ht) const { 01039 if (size() != ht.size()) { 01040 return false; 01041 } else if (this == &ht) { 01042 return true; 01043 } else { 01044 // Iterate through the elements in "this" and see if the 01045 // corresponding element is in ht 01046 for ( const_iterator it = begin(); it != end(); ++it ) { 01047 const_iterator it2 = ht.find(get_key(*it)); 01048 if ((it2 == ht.end()) || (*it != *it2)) { 01049 return false; 01050 } 01051 } 01052 return true; 01053 } 01054 } 01055 bool operator!=(const dense_hashtable& ht) const { 01056 return !(*this == ht); 01057 } 01058 01059 01060 // I/O 01061 // We support reading and writing hashtables to disk. Alas, since 01062 // I don't know how to write a hasher or key_equal, you have to make 01063 // sure everything but the table is the same. We compact before writing 01064 // 01065 // NOTE: These functions are currently TODO. They've not been implemented. 01066 bool write_metadata(FILE * /*fp*/) { 01067 squash_deleted(); // so we don't have to worry about delkey 01068 return false; // TODO 01069 } 01070 01071 bool read_metadata(FILE* /*fp*/) { 01072 num_deleted = 0; // since we got rid before writing 01073 assert(settings.use_empty() && "empty_key not set for read_metadata"); 01074 if (table) val_info.deallocate(table, num_buckets); // we'll make our own 01075 // TODO: read magic number 01076 // TODO: read num_buckets 01077 settings.reset_thresholds(bucket_count()); 01078 table = val_info.allocate(num_buckets); 01079 assert(table); 01080 fill_range_with_empty(table, table + num_buckets); 01081 // TODO: read num_elements 01082 for ( size_type i = 0; i < num_elements; ++i ) { 01083 // TODO: read bucket_num 01084 // TODO: set with non-empty, non-deleted value 01085 } 01086 return false; // TODO 01087 } 01088 01089 // If your keys and values are simple enough, we can write them to 01090 // disk for you. "simple enough" means value_type is a POD type 01091 // that contains no pointers. However, we don't try to normalize 01092 // endianness 01093 bool write_nopointer_data(FILE *fp) const { 01094 for ( const_iterator it = begin(); it != end(); ++it ) { 01095 // TODO: skip empty/deleted values 01096 if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false; 01097 } 01098 return false; 01099 } 01100 01101 // When reading, we have to override the potential const-ness of *it 01102 bool read_nopointer_data(FILE *fp) { 01103 for ( iterator it = begin(); it != end(); ++it ) { 01104 // TODO: skip empty/deleted values 01105 if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) ) 01106 return false; 01107 } 01108 return false; 01109 } 01110 01111 private: 01112 template <class A> 01113 class alloc_impl : public A { 01114 public: 01115 typedef typename A::pointer pointer; 01116 typedef typename A::size_type size_type; 01117 01118 // Convert a normal allocator to one that has realloc_or_die() 01119 alloc_impl(const A& a) : A(a) { } 01120 01121 // realloc_or_die should only be used when using the default 01122 // allocator (libc_allocator_with_realloc). 01123 pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) { 01124 fprintf(stderr, "realloc_or_die is only supported for " 01125 "libc_allocator_with_realloc"); 01126 exit(1); 01127 return NULL; 01128 } 01129 }; 01130 01131 // A template specialization of alloc_impl for 01132 // libc_allocator_with_realloc that can handle realloc_or_die. 01133 template <class A> 01134 class alloc_impl<libc_allocator_with_realloc<A> > 01135 : public libc_allocator_with_realloc<A> { 01136 public: 01137 typedef typename libc_allocator_with_realloc<A>::pointer pointer; 01138 typedef typename libc_allocator_with_realloc<A>::size_type size_type; 01139 01140 alloc_impl(const libc_allocator_with_realloc<A>& a) 01141 : libc_allocator_with_realloc<A>(a) { } 01142 01143 pointer realloc_or_die(pointer ptr, size_type n) { 01144 pointer retval = this->reallocate(ptr, n); 01145 if (retval == NULL) { 01146 // We really should use PRIuS here, but I don't want to have to add 01147 // a whole new configure option, with concomitant macro namespace 01148 // pollution, just to print this (unlikely) error message. So I cast. 01149 fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate " 01150 "%lu elements for ptr %p", 01151 static_cast<unsigned long>(n), ptr); 01152 exit(1); 01153 } 01154 return retval; 01155 } 01156 }; 01157 01158 // Package allocator with emptyval to eliminate memory needed for 01159 // the zero-size allocator. 01160 // If new fields are added to this class, we should add them to 01161 // operator= and swap. 01162 class ValInfo : public alloc_impl<value_alloc_type> { 01163 public: 01164 typedef typename alloc_impl<value_alloc_type>::value_type value_type; 01165 01166 ValInfo(const alloc_impl<value_alloc_type>& a) 01167 : alloc_impl<value_alloc_type>(a), emptyval() { } 01168 ValInfo(const ValInfo& v) 01169 : alloc_impl<value_alloc_type>(v), emptyval(v.emptyval) { } 01170 01171 value_type emptyval; // which key marks unused entries 01172 }; 01173 01174 01175 // Package functors with another class to eliminate memory needed for 01176 // zero-size functors. Since ExtractKey and hasher's operator() might 01177 // have the same function signature, they must be packaged in 01178 // different classes. 01179 struct Settings : 01180 sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> { 01181 explicit Settings(const hasher& hf) 01182 : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>( 01183 hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} 01184 }; 01185 01186 // Packages ExtractKey and SetKey functors. 01187 class KeyInfo : public ExtractKey, public SetKey, public key_equal { 01188 public: 01189 KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq) 01190 : ExtractKey(ek), 01191 SetKey(sk), 01192 key_equal(eq) { 01193 } 01194 01195 // We want to return the exact same type as ExtractKey: Key or const Key& 01196 typename ExtractKey::result_type get_key(const_reference v) const { 01197 return ExtractKey::operator()(v); 01198 } 01199 void set_key(pointer v, const key_type& k) const { 01200 SetKey::operator()(v, k); 01201 } 01202 bool equals(const key_type& a, const key_type& b) const { 01203 return key_equal::operator()(a, b); 01204 } 01205 01206 // Which key marks deleted entries. 01207 // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) 01208 typename remove_const<key_type>::type delkey; 01209 }; 01210 01211 // Utility functions to access the templated operators 01212 size_type hash(const key_type& v) const { 01213 return settings.hash(v); 01214 } 01215 bool equals(const key_type& a, const key_type& b) const { 01216 return key_info.equals(a, b); 01217 } 01218 typename ExtractKey::result_type get_key(const_reference v) const { 01219 return key_info.get_key(v); 01220 } 01221 void set_key(pointer v, const key_type& k) const { 01222 key_info.set_key(v, k); 01223 } 01224 01225 private: 01226 // Actual data 01227 Settings settings; 01228 KeyInfo key_info; 01229 01230 size_type num_deleted; // how many occupied buckets are marked deleted 01231 size_type num_elements; 01232 size_type num_buckets; 01233 ValInfo val_info; // holds emptyval, and also the allocator 01234 pointer table; 01235 }; 01236 01237 01238 // We need a global swap as well 01239 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01240 inline void swap(dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &x, 01241 dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &y) { 01242 x.swap(y); 01243 } 01244 01245 #undef JUMP_ 01246 01247 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01248 const typename dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type 01249 dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET; 01250 01251 // How full we let the table get before we resize. Knuth says .8 is 01252 // good -- higher causes us to probe too much, though saves memory. 01253 // However, we go with .5, getting better performance at the cost of 01254 // more space (a trade-off densehashtable explicitly chooses to make). 01255 // Feel free to play around with different values, though. 01256 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01257 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50; 01258 01259 // How empty we let the table get before we resize lower. 01260 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing 01261 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01262 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT 01263 = static_cast<int>(0.4 * 01264 dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT); 01265 01266 _END_GOOGLE_NAMESPACE_ 01267 01268 #endif /* _DENSEHASHTABLE_H_ */