1 /++
2    This is just a very thin wrapper over dense_hash_table.d, just
3    like sgi stl's stl_hash_set is a very thin wrapper over
4    stl_hashtable.  The major thing we define is operator[], because
5    we have a concept of a data_type which stl_hashtable doesn't
6    \(it only has a key and a value\).
7 
8    This is more different from dense_hash_map than you might think,
9    because all iterators for sets are const \(you obviously can't
10    change the key, and for sets there is no value\).
11 
12    NOTE: this is exactly like sparse_hash_set.h, with the word
13    "sparse" replaced by "dense", except for the addition of
14    `setEmptyKey()`.
15 
16      YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
17 
18    Otherwise your program will die in mysterious ways.  \(Note if you
19    use the constructor that takes an InputIterator range, you pass in
20    the empty key in the constructor, rather than after.  As a result,
21    this constructor differs from the standard STL version.\)
22 
23    In other respects, we adhere mostly to the STL semantics for
24    hash-map.  One important exception is that `insert()` may invalidate
25    iterators entirely -- STL semantics are that `insert()` may reorder
26    iterators, but they all still refer to something valid in the
27    hashtable.  Not so for us.  Likewise, `insert()` may invalidate
28    pointers into the hashtable.  \(Whether insert invalidates iterators
29    and pointers depends on whether it results in a hashtable resize\).
30    On the plus side, `delete()` doesn't invalidate iterators or pointers
31    at all, or even change the ordering of elements.
32 
33    Here are a few "power user" tips:
34 
35       1. `set_deleted_key()`:
36            If you want to use erase() you must call set_deleted_key(),
37            in addition to set_empty_key(), after construction.
38            The deleted and empty keys must differ.
39 
40       2. `resize(0)`:
41            When an item is deleted, its memory isn't freed right
42            away.  This allows you to iterate over a hashtable,
43            and call erase(), without invalidating the iterator.
44            To force the memory to be freed, call resize(0).
45            For tr1 compatibility, this can also be called as rehash(0).
46 
47       3. `min_load_factor(0.0)`
48            Setting the minimum load factor to 0.0 guarantees that
49            the hash table will never shrink.
50 
51    Roughly speaking:
52      (1) dense_hash_set: fastest, uses the most memory unless entries are small
53      (2) sparse_hash_set: slowest, uses the least memory
54      (3) hash_set / unordered_set (STL): in the middle
55 
56    Typically I use sparse_hash_set when I care about space and/or when
57    I need to save the hashtable on disk.  I use hash_set otherwise.  I
58    don't personally use dense_hash_set ever; some people use it for
59    small sets with lots of lookups.
60 
61    - dense_hash_set has, typically, about 78% memory overhead \(if your
62      data takes up X bytes, the hash_set uses .78X more bytes in overhead\).
63    - sparse_hash_set has about 4 bits overhead per entry.
64    - sparse_hash_set can be 3-7 times slower than the others for lookup and,
65      especially, inserts.  See time_hash_map.cc for details.
66 
67    See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html
68    for information about how to use this class.
69 
70    Copyright: (c) 2005, Google Inc.
71    All rights reserved.
72 
73    License:
74    Redistribution and use in source and binary forms, with or without
75    modification, are permitted provided that the following conditions are met:
76 
77    - Redistributions of source code must retain the above copyright
78    notice, this list of conditions and the following disclaimer.
79 
80    - Redistributions in binary form must reproduce the above
81    copyright notice, this list of conditions and the following disclaimer
82    in the documentation and/or other materials provided with the
83    distribution.
84 
85    - Neither the name of Google Inc. nor the names of its
86    contributors may be used to endorse or promote products derived from
87    this software without specific prior written permission.
88 
89    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
90    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
91    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
92    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
93    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
94    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \(INCLUDING, BUT NOT
95    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
96    DATA, OR PROFITS; OR BUSINESS INTERRUPTION\) HOWEVER CAUSED AND ON ANY
97    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
98    \(INCLUDING NEGLIGENCE OR OTHERWISE\) ARISING IN ANY WAY OUT OF THE USE
99    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
100 +/
101 module s2.util.container.dense_hash_set;
102 
103 import s2.util.container.dense_hash_table;
104 
105 /// Helper function for computing the hash of a basic type.
106 size_t hash(ValueT)(in ValueT value) {
107   static if (is(typeof(value.toHash()) : size_t)) {
108     return value.toHash();
109   } else {
110     return typeid(ValueT).getHash(&value);
111   }
112 }
113 
114 /// Helper function for comparing two values.
115 bool equalTo(ValueT)(in ValueT v1, in ValueT v2) {
116   return v1 == v2;
117 }
118 
119 /// Creates a DenseHashSet for a given type using default comparison functions.
120 auto denseHashSet(
121     Value,
122     HashFcn = size_t function(in Value),
123     EqualKey = bool function(in Value, in Value))(
124 
125     size_t expected_max_items_in_table = 0,
126     HashFcn hashFcn = (in Value v) => hash(v),
127     EqualKey equalKey = (in Value v1, in Value v2) => v1 == v2) {
128   return new DenseHashSet!(Value, HashFcn, EqualKey)(
129       expected_max_items_in_table, hashFcn, equalKey);
130 }
131 
132 /// A set implemented via a hash that very few empty slots.
133 class DenseHashSet(Value, HashFcn, EqualKey) {
134 public:
135   /// Apparently identity is not stl-standard, so we define our own
136   static Value identity(in Value v) {
137     return v;
138   }
139 
140   /// Key and Value are the same type in a DenseHashSet.
141   static void setKey(ref Value value, Value key) {
142     value = key;
143   }
144 
145   /// The actual data.
146   alias HashTable = DenseHashTable!(
147       Value, Value, HashFcn, Value function(in Value), void function(ref Value, Value), EqualKey);
148   HashTable rep;
149   alias rep this;
150 
151   alias KeyType = HashTable.KeyType;
152   alias ValueType = HashTable.ValueType;
153   alias Iterator = HashTable.Iterator;
154 
155   /// Basic constructor.
156   this(
157       size_t expected_max_items_in_table, HashFcn hashFcn, EqualKey equalKey) {
158     rep = new HashTable(expected_max_items_in_table, hashFcn, &identity, &setKey, equalKey);
159   }
160 
161   /**
162      A constructor based on interators which provide the values to add to the set.
163      With a DenseHashSet, the key and value types used for the DenseHashTable are the same.
164 
165      Params:
166      InputIterator = Compile-time type parameter of the iterators that support the "*" operator.
167      f = Iterator for the first value.
168      l = Iterator for the last value.
169      empty_key_val = An unused value that can be used to represent an "empty" hash slot.
170      expected_max_items_in_table = Sets an initial size to help avoid additional memory allocations.
171      hashFcn = A function for computin the hash of Value.
172      equalKey = A function to determine if two Values are equal.
173   */
174   this(InputIterator)(
175       InputIterator f,
176       InputIterator l,
177       Value empty_key_val,
178       size_t expected_max_items_in_table,
179       HashFcn hashFcn,
180       EqualKey equalKey)
181   if (is(typeof(*(InputIterator.init)) : Value)) {
182     rep = new HashTable(expected_max_items_in_table, hashFcn, &identity, &setKey, equalKey);
183     setEmptyKey(empty_key_val);
184     rep.insert(f, l);
185   }
186 
187   // We use the default copy constructor
188   // We use the default operator=()
189   // We use the default destructor
190 
191   /// These are tr1 methods.  bucket() is the bucket the key is or would be in.
192   float loadFactor() const {
193     return size() * 1.0f / bucketCount();
194   }
195 
196   float maxLoadFactor() const {
197     float shrink, grow;
198     rep.getResizingParameters(shrink, grow);
199     return grow;
200   }
201 
202   void maxLoadFactor(float new_grow) {
203     float shrink, grow;
204     rep.getResizingParameters(shrink, grow);
205     rep.setResizingParameters(shrink, new_grow);
206   }
207 
208   /// These aren't tr1 methods but perhaps ought to be.
209   float minLoadFactor() const {
210     float shrink, grow;
211     rep.getResizingParameters(shrink, grow);
212     return shrink;
213   }
214 
215   void minLoadFactor(float new_shrink) {
216     float shrink, grow;
217     rep.getResizingParameters(shrink, grow);
218     rep.setResizingParameters(new_shrink, grow);
219   }
220 
221   /// Comparison functions.
222   override
223   bool opEquals(this ThisT)(Object o) {
224     ThisT hs = cast(ThisT) o;
225     if (hs is null) return false;
226     return rep == hs.rep;
227   }
228 
229 }
230 
231 void swap(Value, HashFcn)(DenseHashSet!(Value, HashFcn) hs1, DenseHashSet!(Value, HashFcn) hs2) {
232   hs1.swap(hs2);
233 }