|
Sierra Toolkit
Version of the Day
|
00001 // Copyright (c) 2005, Google Inc. 00002 // All rights reserved. 00003 // 00004 // Redistribution and use in source and binary forms, with or without 00005 // modification, are permitted provided that the following conditions are 00006 // met: 00007 // 00008 // * Redistributions of source code must retain the above copyright 00009 // notice, this list of conditions and the following disclaimer. 00010 // * Redistributions in binary form must reproduce the above 00011 // copyright notice, this list of conditions and the following disclaimer 00012 // in the documentation and/or other materials provided with the 00013 // distribution. 00014 // * Neither the name of Google Inc. nor the names of its 00015 // contributors may be used to endorse or promote products derived from 00016 // this software without specific prior written permission. 00017 // 00018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 00030 // --- 00031 // Author: Craig Silverstein 00032 // 00033 // A sparse hashtable is a particular implementation of 00034 // a hashtable: one that is meant to minimize memory use. 00035 // It does this by using a *sparse table* (cf sparsetable.h), 00036 // which uses between 1 and 2 bits to store empty buckets 00037 // (we may need another bit for hashtables that support deletion). 00038 // 00039 // When empty buckets are so cheap, an appealing hashtable 00040 // implementation is internal probing, in which the hashtable 00041 // is a single table, and collisions are resolved by trying 00042 // to insert again in another bucket. The most cache-efficient 00043 // internal probing schemes are linear probing (which suffers, 00044 // alas, from clumping) and quadratic probing, which is what 00045 // we implement by default. 00046 // 00047 // Deleted buckets are a bit of a pain. We have to somehow mark 00048 // deleted buckets (the probing must distinguish them from empty 00049 // buckets). The most principled way is to have another bitmap, 00050 // but that's annoying and takes up space. Instead we let the 00051 // user specify an "impossible" key. We set deleted buckets 00052 // to have the impossible key. 00053 // 00054 // Note it is possible to change the value of the delete key 00055 // on the fly; you can even remove it, though after that point 00056 // the hashtable is insert_only until you set it again. 00057 // 00058 // You probably shouldn't use this code directly. Use 00059 // <google/sparse_hash_table> or <google/sparse_hash_set> instead. 00060 // 00061 // You can modify the following, below: 00062 // HT_OCCUPANCY_PCT -- how full before we double size 00063 // HT_EMPTY_PCT -- how empty before we halve size 00064 // HT_MIN_BUCKETS -- smallest bucket size 00065 // HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time 00066 // 00067 // You can also change enlarge_factor (which defaults to 00068 // HT_OCCUPANCY_PCT), and shrink_factor (which defaults to 00069 // HT_EMPTY_PCT) with set_resizing_parameters(). 00070 // 00071 // How to decide what values to use? 00072 // shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. 00073 // HT_MIN_BUCKETS is probably unnecessary since you can specify 00074 // (indirectly) the starting number of buckets at construct-time. 00075 // For enlarge_factor, you can use this chart to try to trade-off 00076 // expected lookup time to the space taken up. By default, this 00077 // code uses quadratic probing, though you can change it to linear 00078 // via _JUMP below if you really want to. 00079 // 00080 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html 00081 // NUMBER OF PROBES / LOOKUP Successful Unsuccessful 00082 // Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) 00083 // Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 00084 // 00085 // -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 00086 // QUADRATIC COLLISION RES. 00087 // probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 00088 // probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 00089 // LINEAR COLLISION RES. 00090 // probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 00091 // probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 00092 // 00093 // The value type is required to be copy constructible and default 00094 // constructible, but it need not be (and commonly isn't) assignable. 00095 00096 #ifndef _SPARSEHASHTABLE_H_ 00097 #define _SPARSEHASHTABLE_H_ 00098 00099 #ifndef SPARSEHASH_STAT_UPDATE 00100 #define SPARSEHASH_STAT_UPDATE(x) ((void) 0) 00101 #endif 00102 00103 // The probing method 00104 // Linear probing 00105 // #define JUMP_(key, num_probes) ( 1 ) 00106 // Quadratic probing 00107 #define JUMP_(key, num_probes) ( num_probes ) 00108 00109 #include <stk_util/util/sparseconfig.h> 00110 #include <assert.h> 00111 #include <algorithm> // For swap(), eg 00112 #include <stdexcept> // For length_error 00113 #include <iterator> // for facts about iterator tags 00114 #include <limits> // for numeric_limits<> 00115 #include <utility> // for pair<> 00116 #include <stk_util/util/hashtable-common.h> 00117 #include <stk_util/util/sparsetable> // Since that's basically what we are 00118 00119 _START_GOOGLE_NAMESPACE_ 00120 00121 using STL_NAMESPACE::pair; 00122 00123 // The smaller this is, the faster lookup is (because the group bitmap is 00124 // smaller) and the faster insert is, because there's less to move. 00125 // On the other hand, there are more groups. Since group::size_type is 00126 // a short, this number should be of the form 32*x + 16 to avoid waste. 00127 static const u_int16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words 00128 00129 // Hashtable class, used to implement the hashed associative containers 00130 // hash_set and hash_map. 00131 // 00132 // Value: what is stored in the table (each bucket is a Value). 00133 // Key: something in a 1-to-1 correspondence to a Value, that can be used 00134 // to search for a Value in the table (find() takes a Key). 00135 // HashFcn: Takes a Key and returns an integer, the more unique the better. 00136 // ExtractKey: given a Value, returns the unique Key associated with it. 00137 // Must inherit from unary_function, or at least have a 00138 // result_type enum indicating the return type of operator(). 00139 // SetKey: given a Value* and a Key, modifies the value such that 00140 // ExtractKey(value) == key. We guarantee this is only called 00141 // with key == deleted_key. 00142 // EqualKey: Given two Keys, says whether they are the same (that is, 00143 // if they are both associated with the same Value). 00144 // Alloc: STL allocator to use to allocate memory. 00145 00146 template <class Value, class Key, class HashFcn, 00147 class ExtractKey, class SetKey, class EqualKey, class Alloc> 00148 class sparse_hashtable; 00149 00150 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00151 struct sparse_hashtable_iterator; 00152 00153 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00154 struct sparse_hashtable_const_iterator; 00155 00156 // As far as iterating, we're basically just a sparsetable 00157 // that skips over deleted elements. 00158 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00159 struct sparse_hashtable_iterator { 00160 private: 00161 typedef typename A::template rebind<V>::other value_alloc_type; 00162 00163 public: 00164 typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator; 00165 typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator; 00166 typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::nonempty_iterator 00167 st_iterator; 00168 00169 typedef STL_NAMESPACE::forward_iterator_tag iterator_category; 00170 typedef V value_type; 00171 typedef typename value_alloc_type::difference_type difference_type; 00172 typedef typename value_alloc_type::size_type size_type; 00173 typedef typename value_alloc_type::reference reference; 00174 typedef typename value_alloc_type::pointer pointer; 00175 00176 // "Real" constructor and default constructor 00177 sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h, 00178 st_iterator it, st_iterator it_end) 00179 : ht(h), pos(it), end(it_end) { advance_past_deleted(); } 00180 sparse_hashtable_iterator() { } // not ever used internally 00181 // The default destructor is fine; we don't define one 00182 // The default operator= is fine; we don't define one 00183 00184 // Happy dereferencer 00185 reference operator*() const { return *pos; } 00186 pointer operator->() const { return &(operator*()); } 00187 00188 // Arithmetic. The only hard part is making sure that 00189 // we're not on a marked-deleted array element 00190 void advance_past_deleted() { 00191 while ( pos != end && ht->test_deleted(*this) ) 00192 ++pos; 00193 } 00194 iterator& operator++() { 00195 assert(pos != end); ++pos; advance_past_deleted(); return *this; 00196 } 00197 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } 00198 00199 // Comparison. 00200 bool operator==(const iterator& it) const { return pos == it.pos; } 00201 bool operator!=(const iterator& it) const { return pos != it.pos; } 00202 00203 00204 // The actual data 00205 const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht; 00206 st_iterator pos, end; 00207 }; 00208 00209 // Now do it all again, but with const-ness! 00210 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00211 struct sparse_hashtable_const_iterator { 00212 private: 00213 typedef typename A::template rebind<V>::other value_alloc_type; 00214 00215 public: 00216 typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator; 00217 typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator; 00218 typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::const_nonempty_iterator 00219 st_iterator; 00220 00221 typedef STL_NAMESPACE::forward_iterator_tag iterator_category; 00222 typedef V value_type; 00223 typedef typename value_alloc_type::difference_type difference_type; 00224 typedef typename value_alloc_type::size_type size_type; 00225 typedef typename value_alloc_type::const_reference reference; 00226 typedef typename value_alloc_type::const_pointer pointer; 00227 00228 // "Real" constructor and default constructor 00229 sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h, 00230 st_iterator it, st_iterator it_end) 00231 : ht(h), pos(it), end(it_end) { advance_past_deleted(); } 00232 // This lets us convert regular iterators to const iterators 00233 sparse_hashtable_const_iterator() { } // never used internally 00234 sparse_hashtable_const_iterator(const iterator &it) 00235 : ht(it.ht), pos(it.pos), end(it.end) { } 00236 // The default destructor is fine; we don't define one 00237 // The default operator= is fine; we don't define one 00238 00239 // Happy dereferencer 00240 reference operator*() const { return *pos; } 00241 pointer operator->() const { return &(operator*()); } 00242 00243 // Arithmetic. The only hard part is making sure that 00244 // we're not on a marked-deleted array element 00245 void advance_past_deleted() { 00246 while ( pos != end && ht->test_deleted(*this) ) 00247 ++pos; 00248 } 00249 const_iterator& operator++() { 00250 assert(pos != end); ++pos; advance_past_deleted(); return *this; 00251 } 00252 const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } 00253 00254 // Comparison. 00255 bool operator==(const const_iterator& it) const { return pos == it.pos; } 00256 bool operator!=(const const_iterator& it) const { return pos != it.pos; } 00257 00258 00259 // The actual data 00260 const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht; 00261 st_iterator pos, end; 00262 }; 00263 00264 // And once again, but this time freeing up memory as we iterate 00265 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 00266 struct sparse_hashtable_destructive_iterator { 00267 private: 00268 typedef typename A::template rebind<V>::other value_alloc_type; 00269 00270 public: 00271 typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,SetK,EqK,A> iterator; 00272 typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::destructive_iterator 00273 st_iterator; 00274 00275 typedef STL_NAMESPACE::forward_iterator_tag iterator_category; 00276 typedef V value_type; 00277 typedef typename value_alloc_type::difference_type difference_type; 00278 typedef typename value_alloc_type::size_type size_type; 00279 typedef typename value_alloc_type::reference reference; 00280 typedef typename value_alloc_type::pointer pointer; 00281 00282 // "Real" constructor and default constructor 00283 sparse_hashtable_destructive_iterator(const 00284 sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h, 00285 st_iterator it, st_iterator it_end) 00286 : ht(h), pos(it), end(it_end) { advance_past_deleted(); } 00287 sparse_hashtable_destructive_iterator() { } // never used internally 00288 // The default destructor is fine; we don't define one 00289 // The default operator= is fine; we don't define one 00290 00291 // Happy dereferencer 00292 reference operator*() const { return *pos; } 00293 pointer operator->() const { return &(operator*()); } 00294 00295 // Arithmetic. The only hard part is making sure that 00296 // we're not on a marked-deleted array element 00297 void advance_past_deleted() { 00298 while ( pos != end && ht->test_deleted(*this) ) 00299 ++pos; 00300 } 00301 iterator& operator++() { 00302 assert(pos != end); ++pos; advance_past_deleted(); return *this; 00303 } 00304 iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } 00305 00306 // Comparison. 00307 bool operator==(const iterator& it) const { return pos == it.pos; } 00308 bool operator!=(const iterator& it) const { return pos != it.pos; } 00309 00310 00311 // The actual data 00312 const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *ht; 00313 st_iterator pos, end; 00314 }; 00315 00316 00317 template <class Value, class Key, class HashFcn, 00318 class ExtractKey, class SetKey, class EqualKey, class Alloc> 00319 class sparse_hashtable { 00320 private: 00321 typedef typename Alloc::template rebind<Value>::other value_alloc_type; 00322 00323 public: 00324 typedef Key key_type; 00325 typedef Value value_type; 00326 typedef HashFcn hasher; 00327 typedef EqualKey key_equal; 00328 typedef Alloc allocator_type; 00329 00330 typedef typename value_alloc_type::size_type size_type; 00331 typedef typename value_alloc_type::difference_type difference_type; 00332 typedef typename value_alloc_type::reference reference; 00333 typedef typename value_alloc_type::const_reference const_reference; 00334 typedef typename value_alloc_type::pointer pointer; 00335 typedef typename value_alloc_type::const_pointer const_pointer; 00336 typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, 00337 SetKey, EqualKey, Alloc> 00338 iterator; 00339 00340 typedef sparse_hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, 00341 SetKey, EqualKey, Alloc> 00342 const_iterator; 00343 00344 typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey, 00345 SetKey, EqualKey, Alloc> 00346 destructive_iterator; 00347 00348 // These come from tr1. For us they're the same as regular iterators. 00349 typedef iterator local_iterator; 00350 typedef const_iterator const_local_iterator; 00351 00352 // How full we let the table get before we resize, by default. 00353 // Knuth says .8 is good -- higher causes us to probe too much, 00354 // though it saves memory. 00355 static const int HT_OCCUPANCY_PCT; // = 80 (out of 100); 00356 00357 // How empty we let the table get before we resize lower, by default. 00358 // (0.0 means never resize lower.) 00359 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing 00360 static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT; 00361 00362 // Minimum size we're willing to let hashtables be. 00363 // Must be a power of two, and at least 4. 00364 // Note, however, that for a given hashtable, the initial size is a 00365 // function of the first constructor arg, and may be >HT_MIN_BUCKETS. 00366 static const size_type HT_MIN_BUCKETS = 4; 00367 00368 // By default, if you don't specify a hashtable size at 00369 // construction-time, we use this size. Must be a power of two, and 00370 // at least HT_MIN_BUCKETS. 00371 static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; 00372 00373 // ITERATOR FUNCTIONS 00374 iterator begin() { return iterator(this, table.nonempty_begin(), 00375 table.nonempty_end()); } 00376 iterator end() { return iterator(this, table.nonempty_end(), 00377 table.nonempty_end()); } 00378 const_iterator begin() const { return const_iterator(this, 00379 table.nonempty_begin(), 00380 table.nonempty_end()); } 00381 const_iterator end() const { return const_iterator(this, 00382 table.nonempty_end(), 00383 table.nonempty_end()); } 00384 00385 // These come from tr1 unordered_map. They iterate over 'bucket' n. 00386 // For sparsehashtable, we could consider each 'group' to be a bucket, 00387 // I guess, but I don't really see the point. We'll just consider 00388 // bucket n to be the n-th element of the sparsetable, if it's occupied, 00389 // or some empty element, otherwise. 00390 local_iterator begin(size_type i) { 00391 if (table.test(i)) 00392 return local_iterator(this, table.get_iter(i), table.nonempty_end()); 00393 else 00394 return local_iterator(this, table.nonempty_end(), table.nonempty_end()); 00395 } 00396 local_iterator end(size_type i) { 00397 local_iterator it = begin(i); 00398 if (table.test(i) && !test_deleted(i)) 00399 ++it; 00400 return it; 00401 } 00402 const_local_iterator begin(size_type i) const { 00403 if (table.test(i)) 00404 return const_local_iterator(this, table.get_iter(i), 00405 table.nonempty_end()); 00406 else 00407 return const_local_iterator(this, table.nonempty_end(), 00408 table.nonempty_end()); 00409 } 00410 const_local_iterator end(size_type i) const { 00411 const_local_iterator it = begin(i); 00412 if (table.test(i) && !test_deleted(i)) 00413 ++it; 00414 return it; 00415 } 00416 00417 // This is used when resizing 00418 destructive_iterator destructive_begin() { 00419 return destructive_iterator(this, table.destructive_begin(), 00420 table.destructive_end()); 00421 } 00422 destructive_iterator destructive_end() { 00423 return destructive_iterator(this, table.destructive_end(), 00424 table.destructive_end()); 00425 } 00426 00427 00428 // ACCESSOR FUNCTIONS for the things we templatize on, basically 00429 hasher hash_funct() const { return settings; } 00430 key_equal key_eq() const { return key_info; } 00431 allocator_type get_allocator() const { return table.get_allocator(); } 00432 00433 // Accessor function for statistics gathering. 00434 int num_table_copies() const { return settings.num_ht_copies(); } 00435 00436 private: 00437 // We need to copy values when we set the special marker for deleted 00438 // elements, but, annoyingly, we can't just use the copy assignment 00439 // operator because value_type might not be assignable (it's often 00440 // pair<const X, Y>). We use explicit destructor invocation and 00441 // placement new to get around this. Arg. 00442 void set_value(pointer dst, const_reference src) { 00443 dst->~value_type(); // delete the old value, if any 00444 new(dst) value_type(src); 00445 } 00446 00447 // This is used as a tag for the copy constructor, saying to destroy its 00448 // arg We have two ways of destructively copying: with potentially growing 00449 // the hashtable as we copy, and without. To make sure the outside world 00450 // can't do a destructive copy, we make the typename private. 00451 enum MoveDontCopyT {MoveDontCopy, MoveDontGrow}; 00452 00453 // DELETE HELPER FUNCTIONS 00454 // This lets the user describe a key that will indicate deleted 00455 // table entries. This key should be an "impossible" entry -- 00456 // if you try to insert it for real, you won't be able to retrieve it! 00457 // (NB: while you pass in an entire value, only the key part is looked 00458 // at. This is just because I don't know how to assign just a key.) 00459 private: 00460 void squash_deleted() { // gets rid of any deleted entries we have 00461 if ( num_deleted ) { // get rid of deleted before writing 00462 sparse_hashtable tmp(MoveDontGrow, *this); 00463 swap(tmp); // now we are tmp 00464 } 00465 assert(num_deleted == 0); 00466 } 00467 00468 bool test_deleted_key(const key_type& key) const { 00469 // The num_deleted test is crucial for read(): after read(), the ht values 00470 // are garbage, and we don't want to think some of them are deleted. 00471 // Invariant: !use_deleted implies num_deleted is 0. 00472 assert(settings.use_deleted() || num_deleted == 0); 00473 return num_deleted > 0 && equals(key_info.delkey, key); 00474 } 00475 00476 public: 00477 void set_deleted_key(const key_type &key) { 00478 // It's only safe to change what "deleted" means if we purge deleted guys 00479 squash_deleted(); 00480 settings.set_use_deleted(true); 00481 key_info.delkey = key; 00482 } 00483 void clear_deleted_key() { 00484 squash_deleted(); 00485 settings.set_use_deleted(false); 00486 } 00487 key_type deleted_key() const { 00488 assert(settings.use_deleted() 00489 && "Must set deleted key before calling deleted_key"); 00490 return key_info.delkey; 00491 } 00492 00493 // These are public so the iterators can use them 00494 // True if the item at position bucknum is "deleted" marker 00495 bool test_deleted(size_type bucknum) const { 00496 if (num_deleted == 0 || !table.test(bucknum)) return false; 00497 return test_deleted_key(get_key(table.unsafe_get(bucknum))); 00498 } 00499 bool test_deleted(const iterator &it) const { 00500 if (!settings.use_deleted()) return false; 00501 return test_deleted_key(get_key(*it)); 00502 } 00503 bool test_deleted(const const_iterator &it) const { 00504 if (!settings.use_deleted()) return false; 00505 return test_deleted_key(get_key(*it)); 00506 } 00507 bool test_deleted(const destructive_iterator &it) const { 00508 if (!settings.use_deleted()) return false; 00509 return test_deleted_key(get_key(*it)); 00510 } 00511 00512 private: 00513 // Set it so test_deleted is true. true if object didn't used to be deleted. 00514 // TODO(csilvers): make these private (also in densehashtable.h) 00515 bool set_deleted(iterator &it) { 00516 assert(settings.use_deleted()); 00517 bool retval = !test_deleted(it); 00518 // &* converts from iterator to value-type. 00519 set_key(&(*it), key_info.delkey); 00520 return retval; 00521 } 00522 // Set it so test_deleted is false. true if object used to be deleted. 00523 bool clear_deleted(iterator &it) { 00524 assert(settings.use_deleted()); 00525 // Happens automatically when we assign something else in its place. 00526 return test_deleted(it); 00527 } 00528 00529 // We also allow to set/clear the deleted bit on a const iterator. 00530 // We allow a const_iterator for the same reason you can delete a 00531 // const pointer: it's convenient, and semantically you can't use 00532 // 'it' after it's been deleted anyway, so its const-ness doesn't 00533 // really matter. 00534 bool set_deleted(const_iterator &it) { 00535 assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called 00536 bool retval = !test_deleted(it); 00537 set_key(const_cast<pointer>(&(*it)), key_info.delkey); 00538 return retval; 00539 } 00540 // Set it so test_deleted is false. true if object used to be deleted. 00541 bool clear_deleted(const_iterator &it) { 00542 assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called 00543 return test_deleted(it); 00544 } 00545 00546 // FUNCTIONS CONCERNING SIZE 00547 public: 00548 size_type size() const { return table.num_nonempty() - num_deleted; } 00549 size_type max_size() const { return table.max_size(); } 00550 bool empty() const { return size() == 0; } 00551 size_type bucket_count() const { return table.size(); } 00552 size_type max_bucket_count() const { return max_size(); } 00553 // These are tr1 methods. Their idea of 'bucket' doesn't map well to 00554 // what we do. We just say every bucket has 0 or 1 items in it. 00555 size_type bucket_size(size_type i) const { 00556 return begin(i) == end(i) ? 0 : 1; 00557 } 00558 00559 private: 00560 // Because of the above, size_type(-1) is never legal; use it for errors 00561 static const size_type ILLEGAL_BUCKET = size_type(-1); 00562 00563 // Used after a string of deletes. Returns true if we actually shrunk. 00564 // TODO(csilvers): take a delta so we can take into account inserts 00565 // done after shrinking. Maybe make part of the Settings class? 00566 bool maybe_shrink() { 00567 assert(table.num_nonempty() >= num_deleted); 00568 assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two 00569 assert(bucket_count() >= HT_MIN_BUCKETS); 00570 bool retval = false; 00571 00572 // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, 00573 // we'll never shrink until you get relatively big, and we'll never 00574 // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something 00575 // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will 00576 // shrink us down to HT_MIN_BUCKETS buckets, which is too small. 00577 const size_type num_remain = table.num_nonempty() - num_deleted; 00578 const size_type shrink_threshold = settings.shrink_threshold(); 00579 if (shrink_threshold > 0 && num_remain < shrink_threshold && 00580 bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { 00581 const float shrink_factor = settings.shrink_factor(); 00582 size_type sz = bucket_count() / 2; // find how much we should shrink 00583 while (sz > HT_DEFAULT_STARTING_BUCKETS && 00584 num_remain < static_cast<size_type>(sz * shrink_factor)) { 00585 sz /= 2; // stay a power of 2 00586 } 00587 sparse_hashtable tmp(MoveDontCopy, *this, sz); 00588 swap(tmp); // now we are tmp 00589 retval = true; 00590 } 00591 settings.set_consider_shrink(false); // because we just considered it 00592 return retval; 00593 } 00594 00595 // We'll let you resize a hashtable -- though this makes us copy all! 00596 // When you resize, you say, "make it big enough for this many more elements" 00597 // Returns true if we actually resized, false if size was already ok. 00598 bool resize_delta(size_type delta) { 00599 bool did_resize = false; 00600 if ( settings.consider_shrink() ) { // see if lots of deletes happened 00601 if ( maybe_shrink() ) 00602 did_resize = true; 00603 } 00604 if (table.num_nonempty() >= 00605 (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta) 00606 throw std::length_error("resize overflow"); 00607 if ( bucket_count() >= HT_MIN_BUCKETS && 00608 (table.num_nonempty() + delta) <= settings.enlarge_threshold() ) 00609 return did_resize; // we're ok as we are 00610 00611 // Sometimes, we need to resize just to get rid of all the 00612 // "deleted" buckets that are clogging up the hashtable. So when 00613 // deciding whether to resize, count the deleted buckets (which 00614 // are currently taking up room). But later, when we decide what 00615 // size to resize to, *don't* count deleted buckets, since they 00616 // get discarded during the resize. 00617 const size_type needed_size = 00618 settings.min_buckets(table.num_nonempty() + delta, 0); 00619 if ( needed_size <= bucket_count() ) // we have enough buckets 00620 return did_resize; 00621 00622 size_type resize_to = 00623 settings.min_buckets(table.num_nonempty() - num_deleted + delta, 00624 bucket_count()); 00625 if (resize_to < needed_size && // may double resize_to 00626 resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) { 00627 // This situation means that we have enough deleted elements, 00628 // that once we purge them, we won't actually have needed to 00629 // grow. But we may want to grow anyway: if we just purge one 00630 // element, say, we'll have to grow anyway next time we 00631 // insert. Might as well grow now, since we're already going 00632 // through the trouble of copying (in order to purge the 00633 // deleted elements). 00634 const size_type target = 00635 static_cast<size_type>(settings.shrink_size(resize_to*2)); 00636 if (table.num_nonempty() - num_deleted + delta >= target) { 00637 // Good, we won't be below the shrink threshhold even if we double. 00638 resize_to *= 2; 00639 } 00640 } 00641 00642 sparse_hashtable tmp(MoveDontCopy, *this, resize_to); 00643 swap(tmp); // now we are tmp 00644 return true; 00645 } 00646 00647 // Used to actually do the rehashing when we grow/shrink a hashtable 00648 void copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted) { 00649 clear(); // clear table, set num_deleted to 0 00650 00651 // If we need to change the size of our table, do it now 00652 const size_type resize_to = 00653 settings.min_buckets(ht.size(), min_buckets_wanted); 00654 if ( resize_to > bucket_count() ) { // we don't have enough buckets 00655 table.resize(resize_to); // sets the number of buckets 00656 settings.reset_thresholds(bucket_count()); 00657 } 00658 00659 // We use a normal iterator to get non-deleted bcks from ht 00660 // We could use insert() here, but since we know there are 00661 // no duplicates and no deleted items, we can be more efficient 00662 assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two 00663 for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { 00664 size_type num_probes = 0; // how many times we've probed 00665 size_type bucknum; 00666 const size_type bucket_count_minus_one = bucket_count() - 1; 00667 for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; 00668 table.test(bucknum); // not empty 00669 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { 00670 ++num_probes; 00671 assert(num_probes < bucket_count() 00672 && "Hashtable is full: an error in key_equal<> or hash<>"); 00673 } 00674 table.set(bucknum, *it); // copies the value to here 00675 } 00676 settings.inc_num_ht_copies(); 00677 } 00678 00679 // Implementation is like copy_from, but it destroys the table of the 00680 // "from" guy by freeing sparsetable memory as we iterate. This is 00681 // useful in resizing, since we're throwing away the "from" guy anyway. 00682 void move_from(MoveDontCopyT mover, sparse_hashtable &ht, 00683 size_type min_buckets_wanted) { 00684 clear(); // clear table, set num_deleted to 0 00685 00686 // If we need to change the size of our table, do it now 00687 size_type resize_to; 00688 if ( mover == MoveDontGrow ) 00689 resize_to = ht.bucket_count(); // keep same size as old ht 00690 else // MoveDontCopy 00691 resize_to = settings.min_buckets(ht.size(), min_buckets_wanted); 00692 if ( resize_to > bucket_count() ) { // we don't have enough buckets 00693 table.resize(resize_to); // sets the number of buckets 00694 settings.reset_thresholds(bucket_count()); 00695 } 00696 00697 // We use a normal iterator to get non-deleted bcks from ht 00698 // We could use insert() here, but since we know there are 00699 // no duplicates and no deleted items, we can be more efficient 00700 assert( (bucket_count() & (bucket_count()-1)) == 0); // a power of two 00701 // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM(): 00702 for ( destructive_iterator it = ht.destructive_begin(); 00703 it != ht.destructive_end(); ++it ) { 00704 size_type num_probes = 0; // how many times we've probed 00705 size_type bucknum; 00706 for ( bucknum = hash(get_key(*it)) & (bucket_count()-1); // h % buck_cnt 00707 table.test(bucknum); // not empty 00708 bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) { 00709 ++num_probes; 00710 assert(num_probes < bucket_count() 00711 && "Hashtable is full: an error in key_equal<> or hash<>"); 00712 } 00713 table.set(bucknum, *it); // copies the value to here 00714 } 00715 settings.inc_num_ht_copies(); 00716 } 00717 00718 00719 // Required by the spec for hashed associative container 00720 public: 00721 // Though the docs say this should be num_buckets, I think it's much 00722 // more useful as num_elements. As a special feature, calling with 00723 // req_elements==0 will cause us to shrink if we can, saving space. 00724 void resize(size_type req_elements) { // resize to this or larger 00725 if ( settings.consider_shrink() || req_elements == 0 ) 00726 maybe_shrink(); 00727 if ( req_elements > table.num_nonempty() ) // we only grow 00728 resize_delta(req_elements - table.num_nonempty()); 00729 } 00730 00731 // Get and change the value of shrink_factor and enlarge_factor. The 00732 // description at the beginning of this file explains how to choose 00733 // the values. Setting the shrink parameter to 0.0 ensures that the 00734 // table never shrinks. 00735 void get_resizing_parameters(float* shrink, float* grow) const { 00736 *shrink = settings.shrink_factor(); 00737 *grow = settings.enlarge_factor(); 00738 } 00739 void set_resizing_parameters(float shrink, float grow) { 00740 settings.set_resizing_parameters(shrink, grow); 00741 settings.reset_thresholds(bucket_count()); 00742 } 00743 00744 // CONSTRUCTORS -- as required by the specs, we take a size, 00745 // but also let you specify a hashfunction, key comparator, 00746 // and key extractor. We also define a copy constructor and =. 00747 // DESTRUCTOR -- the default is fine, surprisingly. 00748 explicit sparse_hashtable(size_type expected_max_items_in_table = 0, 00749 const HashFcn& hf = HashFcn(), 00750 const EqualKey& eql = EqualKey(), 00751 const ExtractKey& ext = ExtractKey(), 00752 const SetKey& set = SetKey(), 00753 const Alloc& alloc = Alloc()) 00754 : settings(hf), 00755 key_info(ext, set, eql), 00756 num_deleted(0), 00757 table((expected_max_items_in_table == 0 00758 ? HT_DEFAULT_STARTING_BUCKETS 00759 : settings.min_buckets(expected_max_items_in_table, 0)), 00760 alloc) { 00761 settings.reset_thresholds(bucket_count()); 00762 } 00763 00764 // As a convenience for resize(), we allow an optional second argument 00765 // which lets you make this new hashtable a different size than ht. 00766 // We also provide a mechanism of saying you want to "move" the ht argument 00767 // into us instead of copying. 00768 sparse_hashtable(const sparse_hashtable& ht, 00769 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) 00770 : settings(ht.settings), 00771 key_info(ht.key_info), 00772 num_deleted(0), 00773 table(0, ht.get_allocator()) { 00774 settings.reset_thresholds(bucket_count()); 00775 copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries 00776 } 00777 sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht, 00778 size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) 00779 : settings(ht.settings), 00780 key_info(ht.key_info), 00781 num_deleted(0), 00782 table(0, ht.get_allocator()) { 00783 settings.reset_thresholds(bucket_count()); 00784 move_from(mover, ht, min_buckets_wanted); // ignores deleted entries 00785 } 00786 00787 sparse_hashtable& operator= (const sparse_hashtable& ht) { 00788 if (&ht == this) return *this; // don't copy onto ourselves 00789 settings = ht.settings; 00790 key_info = ht.key_info; 00791 num_deleted = ht.num_deleted; 00792 // copy_from() calls clear and sets num_deleted to 0 too 00793 copy_from(ht, HT_MIN_BUCKETS); 00794 // we purposefully don't copy the allocator, which may not be copyable 00795 return *this; 00796 } 00797 00798 // Many STL algorithms use swap instead of copy constructors 00799 void swap(sparse_hashtable& ht) { 00800 STL_NAMESPACE::swap(settings, ht.settings); 00801 STL_NAMESPACE::swap(key_info, ht.key_info); 00802 STL_NAMESPACE::swap(num_deleted, ht.num_deleted); 00803 table.swap(ht.table); 00804 } 00805 00806 // It's always nice to be able to clear a table without deallocating it 00807 void clear() { 00808 if (!empty() || (num_deleted != 0)) { 00809 table.clear(); 00810 } 00811 settings.reset_thresholds(bucket_count()); 00812 num_deleted = 0; 00813 } 00814 00815 // LOOKUP ROUTINES 00816 private: 00817 // Returns a pair of positions: 1st where the object is, 2nd where 00818 // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET 00819 // if object is not found; 2nd is ILLEGAL_BUCKET if it is. 00820 // Note: because of deletions where-to-insert is not trivial: it's the 00821 // first deleted bucket we see, as long as we don't find the key later 00822 pair<size_type, size_type> find_position(const key_type &key) const { 00823 size_type num_probes = 0; // how many times we've probed 00824 const size_type bucket_count_minus_one = bucket_count() - 1; 00825 size_type bucknum = hash(key) & bucket_count_minus_one; 00826 size_type insert_pos = ILLEGAL_BUCKET; // where we would insert 00827 SPARSEHASH_STAT_UPDATE(total_lookups += 1); 00828 while ( 1 ) { // probe until something happens 00829 if ( !table.test(bucknum) ) { // bucket is empty 00830 SPARSEHASH_STAT_UPDATE(total_probes += num_probes); 00831 if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert 00832 return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum); 00833 else 00834 return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos); 00835 00836 } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert 00837 if ( insert_pos == ILLEGAL_BUCKET ) 00838 insert_pos = bucknum; 00839 00840 } else if ( equals(key, get_key(table.unsafe_get(bucknum))) ) { 00841 SPARSEHASH_STAT_UPDATE(total_probes += num_probes); 00842 return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET); 00843 } 00844 ++num_probes; // we're doing another probe 00845 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; 00846 assert(num_probes < bucket_count() 00847 && "Hashtable is full: an error in key_equal<> or hash<>"); 00848 } 00849 } 00850 00851 public: 00852 iterator find(const key_type& key) { 00853 if ( size() == 0 ) return end(); 00854 pair<size_type, size_type> pos = find_position(key); 00855 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there 00856 return end(); 00857 else 00858 return iterator(this, table.get_iter(pos.first), table.nonempty_end()); 00859 } 00860 00861 const_iterator find(const key_type& key) const { 00862 if ( size() == 0 ) return end(); 00863 pair<size_type, size_type> pos = find_position(key); 00864 if ( pos.first == ILLEGAL_BUCKET ) // alas, not there 00865 return end(); 00866 else 00867 return const_iterator(this, 00868 table.get_iter(pos.first), table.nonempty_end()); 00869 } 00870 00871 // This is a tr1 method: the bucket a given key is in, or what bucket 00872 // it would be put in, if it were to be inserted. Shrug. 00873 size_type bucket(const key_type& key) const { 00874 pair<size_type, size_type> pos = find_position(key); 00875 return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; 00876 } 00877 00878 // Counts how many elements have key key. For maps, it's either 0 or 1. 00879 size_type count(const key_type &key) const { 00880 pair<size_type, size_type> pos = find_position(key); 00881 return pos.first == ILLEGAL_BUCKET ? 0 : 1; 00882 } 00883 00884 // Likewise, equal_range doesn't really make sense for us. Oh well. 00885 pair<iterator,iterator> equal_range(const key_type& key) { 00886 iterator pos = find(key); // either an iterator or end 00887 if (pos == end()) { 00888 return pair<iterator,iterator>(pos, pos); 00889 } else { 00890 const iterator startpos = pos++; 00891 return pair<iterator,iterator>(startpos, pos); 00892 } 00893 } 00894 pair<const_iterator,const_iterator> equal_range(const key_type& key) const { 00895 const_iterator pos = find(key); // either an iterator or end 00896 if (pos == end()) { 00897 return pair<const_iterator,const_iterator>(pos, pos); 00898 } else { 00899 const const_iterator startpos = pos++; 00900 return pair<const_iterator,const_iterator>(startpos, pos); 00901 } 00902 } 00903 00904 00905 // INSERTION ROUTINES 00906 private: 00907 // Private method used by insert_noresize and find_or_insert. 00908 iterator insert_at(const_reference obj, size_type pos) { 00909 if (size() >= max_size()) 00910 throw std::length_error("insert overflow"); 00911 if ( test_deleted(pos) ) { // just replace if it's been deleted 00912 // The set() below will undelete this object. We just worry about stats 00913 assert(num_deleted > 0); 00914 --num_deleted; // used to be, now it isn't 00915 } 00916 table.set(pos, obj); 00917 return iterator(this, table.get_iter(pos), table.nonempty_end()); 00918 } 00919 00920 // If you know *this is big enough to hold obj, use this routine 00921 pair<iterator, bool> insert_noresize(const_reference obj) { 00922 // First, double-check we're not inserting delkey 00923 assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) 00924 && "Inserting the deleted key"); 00925 const pair<size_type,size_type> pos = find_position(get_key(obj)); 00926 if ( pos.first != ILLEGAL_BUCKET) { // object was already there 00927 return pair<iterator,bool>(iterator(this, table.get_iter(pos.first), 00928 table.nonempty_end()), 00929 false); // false: we didn't insert 00930 } else { // pos.second says where to put it 00931 return pair<iterator,bool>(insert_at(obj, pos.second), true); 00932 } 00933 } 00934 00935 // Specializations of insert(it, it) depending on the power of the iterator: 00936 // (1) Iterator supports operator-, resize before inserting 00937 template <class ForwardIterator> 00938 void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) { 00939 size_t dist = STL_NAMESPACE::distance(f, l); 00940 if (dist >= (std::numeric_limits<size_type>::max)()) 00941 throw std::length_error("insert-range overflow"); 00942 resize_delta(static_cast<size_type>(dist)); 00943 for ( ; dist > 0; --dist, ++f) { 00944 insert_noresize(*f); 00945 } 00946 } 00947 00948 // (2) Arbitrary iterator, can't tell how much to resize 00949 template <class InputIterator> 00950 void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) { 00951 for ( ; f != l; ++f) 00952 insert(*f); 00953 } 00954 00955 public: 00956 // This is the normal insert routine, used by the outside world 00957 pair<iterator, bool> insert(const_reference obj) { 00958 resize_delta(1); // adding an object, grow if need be 00959 return insert_noresize(obj); 00960 } 00961 00962 // When inserting a lot at a time, we specialize on the type of iterator 00963 template <class InputIterator> 00964 void insert(InputIterator f, InputIterator l) { 00965 // specializes on iterator type 00966 insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category()); 00967 } 00968 00969 // DefaultValue is a functor that takes a key and returns a value_type 00970 // representing the default value to be inserted if none is found. 00971 template <class DefaultValue> 00972 value_type& find_or_insert(const key_type& key) { 00973 // First, double-check we're not inserting delkey 00974 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) 00975 && "Inserting the deleted key"); 00976 const pair<size_type,size_type> pos = find_position(key); 00977 DefaultValue default_value; 00978 if ( pos.first != ILLEGAL_BUCKET) { // object was already there 00979 return *table.get_iter(pos.first); 00980 } else if (resize_delta(1)) { // needed to rehash to make room 00981 // Since we resized, we can't use pos, so recalculate where to insert. 00982 return *insert_noresize(default_value(key)).first; 00983 } else { // no need to rehash, insert right here 00984 return *insert_at(default_value(key), pos.second); 00985 } 00986 } 00987 00988 // DELETION ROUTINES 00989 size_type erase(const key_type& key) { 00990 // First, double-check we're not erasing delkey. 00991 assert((!settings.use_deleted() || !equals(key, key_info.delkey)) 00992 && "Erasing the deleted key"); 00993 assert(!settings.use_deleted() || !equals(key, key_info.delkey)); 00994 const_iterator pos = find(key); // shrug: shouldn't need to be const 00995 if ( pos != end() ) { 00996 assert(!test_deleted(pos)); // or find() shouldn't have returned it 00997 set_deleted(pos); 00998 ++num_deleted; 00999 // will think about shrink after next insert 01000 settings.set_consider_shrink(true); 01001 return 1; // because we deleted one thing 01002 } else { 01003 return 0; // because we deleted nothing 01004 } 01005 } 01006 01007 // We return the iterator past the deleted item. 01008 void erase(iterator pos) { 01009 if ( pos == end() ) return; // sanity check 01010 if ( set_deleted(pos) ) { // true if object has been newly deleted 01011 ++num_deleted; 01012 // will think about shrink after next insert 01013 settings.set_consider_shrink(true); 01014 } 01015 } 01016 01017 void erase(iterator f, iterator l) { 01018 for ( ; f != l; ++f) { 01019 if ( set_deleted(f) ) // should always be true 01020 ++num_deleted; 01021 } 01022 // will think about shrink after next insert 01023 settings.set_consider_shrink(true); 01024 } 01025 01026 // We allow you to erase a const_iterator just like we allow you to 01027 // erase an iterator. This is in parallel to 'delete': you can delete 01028 // a const pointer just like a non-const pointer. The logic is that 01029 // you can't use the object after it's erased anyway, so it doesn't matter 01030 // if it's const or not. 01031 void erase(const_iterator pos) { 01032 if ( pos == end() ) return; // sanity check 01033 if ( set_deleted(pos) ) { // true if object has been newly deleted 01034 ++num_deleted; 01035 // will think about shrink after next insert 01036 settings.set_consider_shrink(true); 01037 } 01038 } 01039 void erase(const_iterator f, const_iterator l) { 01040 for ( ; f != l; ++f) { 01041 if ( set_deleted(f) ) // should always be true 01042 ++num_deleted; 01043 } 01044 // will think about shrink after next insert 01045 settings.set_consider_shrink(true); 01046 } 01047 01048 01049 // COMPARISON 01050 bool operator==(const sparse_hashtable& ht) const { 01051 if (size() != ht.size()) { 01052 return false; 01053 } else if (this == &ht) { 01054 return true; 01055 } else { 01056 // Iterate through the elements in "this" and see if the 01057 // corresponding element is in ht 01058 for ( const_iterator it = begin(); it != end(); ++it ) { 01059 const_iterator it2 = ht.find(get_key(*it)); 01060 if ((it2 == ht.end()) || (*it != *it2)) { 01061 return false; 01062 } 01063 } 01064 return true; 01065 } 01066 } 01067 bool operator!=(const sparse_hashtable& ht) const { 01068 return !(*this == ht); 01069 } 01070 01071 01072 // I/O 01073 // We support reading and writing hashtables to disk. NOTE that 01074 // this only stores the hashtable metadata, not the stuff you've 01075 // actually put in the hashtable! Alas, since I don't know how to 01076 // write a hasher or key_equal, you have to make sure everything 01077 // but the table is the same. We compact before writing. 01078 bool write_metadata(FILE *fp) { 01079 squash_deleted(); // so we don't have to worry about delkey 01080 return table.write_metadata(fp); 01081 } 01082 01083 bool read_metadata(FILE *fp) { 01084 num_deleted = 0; // since we got rid before writing 01085 bool result = table.read_metadata(fp); 01086 settings.reset_thresholds(bucket_count()); 01087 return result; 01088 } 01089 01090 // Only meaningful if value_type is a POD. 01091 bool write_nopointer_data(FILE *fp) { 01092 return table.write_nopointer_data(fp); 01093 } 01094 01095 // Only meaningful if value_type is a POD. 01096 bool read_nopointer_data(FILE *fp) { 01097 return table.read_nopointer_data(fp); 01098 } 01099 01100 private: 01101 // Table is the main storage class. 01102 typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table; 01103 01104 // Package templated functors with the other types to eliminate memory 01105 // needed for storing these zero-size operators. Since ExtractKey and 01106 // hasher's operator() might have the same function signature, they 01107 // must be packaged in different classes. 01108 struct Settings : 01109 sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> { 01110 explicit Settings(const hasher& hf) 01111 : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>( 01112 hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} 01113 }; 01114 01115 // KeyInfo stores delete key and packages zero-size functors: 01116 // ExtractKey and SetKey. 01117 class KeyInfo : public ExtractKey, public SetKey, public key_equal { 01118 public: 01119 KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq) 01120 : ExtractKey(ek), 01121 SetKey(sk), 01122 key_equal(eq) { 01123 } 01124 // We want to return the exact same type as ExtractKey: Key or const Key& 01125 typename ExtractKey::result_type get_key(const_reference v) const { 01126 return ExtractKey::operator()(v); 01127 } 01128 void set_key(pointer v, const key_type& k) const { 01129 SetKey::operator()(v, k); 01130 } 01131 bool equals(const key_type& a, const key_type& b) const { 01132 return key_equal::operator()(a, b); 01133 } 01134 01135 // Which key marks deleted entries. 01136 // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) 01137 typename remove_const<key_type>::type delkey; 01138 }; 01139 01140 // Utility functions to access the templated operators 01141 size_type hash(const key_type& v) const { 01142 return settings.hash(v); 01143 } 01144 bool equals(const key_type& a, const key_type& b) const { 01145 return key_info.equals(a, b); 01146 } 01147 typename ExtractKey::result_type get_key(const_reference v) const { 01148 return key_info.get_key(v); 01149 } 01150 void set_key(pointer v, const key_type& k) const { 01151 key_info.set_key(v, k); 01152 } 01153 01154 private: 01155 // Actual data 01156 Settings settings; 01157 KeyInfo key_info; 01158 size_type num_deleted; // how many occupied buckets are marked deleted 01159 Table table; // holds num_buckets and num_elements too 01160 }; 01161 01162 01163 // We need a global swap as well 01164 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01165 inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x, 01166 sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y) { 01167 x.swap(y); 01168 } 01169 01170 #undef JUMP_ 01171 01172 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01173 const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type 01174 sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET; 01175 01176 // How full we let the table get before we resize. Knuth says .8 is 01177 // good -- higher causes us to probe too much, though saves memory 01178 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01179 const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 80; 01180 01181 // How empty we let the table get before we resize lower. 01182 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing 01183 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A> 01184 const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT 01185 = static_cast<int>(0.4 * 01186 sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT); 01187 01188 _END_GOOGLE_NAMESPACE_ 01189 01190 #endif /* _SPARSEHASHTABLE_H_ */