1 // Copyright 2016 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS-IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 
16 // Original Author: ericv@google.com (Eric Veach)
17 // Converted to D:  madric@gmail.com (Vijay Nayar)
18 
19 module s2.value_lexicon;
20 
21 import s2.util.container.dense_hash_set;
22 
23 import std.range;
24 
25 /**
26  * ValueLexicon is a class that maps distinct values to sequentially numbered
27  * integer identifiers.  It automatically eliminates duplicates and uses a
28  * compact representation.  See also SequenceLexicon.
29  *
30  * Each distinct value is mapped to a 32-bit integer.  The space used for each
31  * value is approximately 7 bytes plus the space needed for the value itself.
32  * For example, int64 values would need approximately 15 bytes each.  Note
33  * also that values are referred to using 32-bit ids rather than 64-bit
34  * pointers.
35  *
36  * This class has the same thread-safety properties as "string": const methods
37  * are thread safe, and non-const methods are not thread safe.
38  *
39  * Example usage:
40  *
41  *   ValueLexicon<string> lexicon;
42  *   uint32 cat_id = lexicon.Add("cat");
43  *   EXPECT_EQ(cat_id, lexicon.Add("cat"));
44  *   EXPECT_EQ("cat", lexicon.value(cat_id));
45  *
46  */
47 class ValueLexicon(T) {
48 public:
49   this() {
50     // Simply use the defaults used in the DenseHashSet.
51     _hasher = &hash!T;
52     _keyEqual = &equalTo!T;
53     _idSet = new IdSet(0, new IdHasher(), new IdKeyEqual());
54     _idSet.setEmptyKey(EMPTY_KEY);
55   }
56 
57   this(ValueLexicon!T x) {
58     _hasher = &hash!T;
59     _keyEqual = &equalTo!T;
60     _values = x._values.dup;
61     _idSet = new IdSet(
62         x._idSet.begin(), x._idSet.end(), EMPTY_KEY, 0, new IdHasher(), new IdKeyEqual());
63   }
64 
65   /// Clears all data from the lexicon.
66   void clear() {
67     _values.length = 0;
68     _idSet.clear();
69   }
70 
71   /// Add the given value to the lexicon if it is not already present, and
72   /// return its integer id.  Ids are assigned sequentially starting from zero.
73   uint add(in T value) {
74     if (!_values.empty() && _keyEqual(value, _values.back())) {
75       return cast(uint) _values.length - 1;
76     }
77     _values ~= value;
78     auto result = _idSet.insert(cast(uint) _values.length - 1);
79     if (result.second) {
80       return cast(uint) _values.length - 1;
81     } else {
82       _values.popBack();
83       return *(result.first);
84     }
85   }
86 
87   /// Return the number of values in the lexicon.
88   uint size() const {
89     return cast(uint) _values.length;
90   }
91 
92   /// Return the value with the given id.
93   const(T) value(uint id) const {
94     return _values[id];
95   }
96 
97  private:
98   // Choose kEmptyKey to be the last key that will ever be generated.
99   enum int EMPTY_KEY = int.max;
100 
101   class IdHasher {
102   public:
103     this() { }
104     size_t opCall()(uint id) const {
105       return _hasher(value(id));
106     }
107   }
108 
109   class IdKeyEqual {
110   public:
111     this() { }
112     bool opCall(in uint id1, in uint id2) const {
113       if (id1 == id2) return true;
114       if (id1 == EMPTY_KEY || id2 == EMPTY_KEY) {
115         return false;
116       }
117       return _keyEqual(value(id1), value(id2));
118     }
119   }
120 
121   alias IdSet = DenseHashSet!(uint, IdHasher, IdKeyEqual);
122 
123   alias Hasher = size_t function(in T);
124   alias KeyEqual = bool function(in T, in T);
125 
126   Hasher _hasher;
127   KeyEqual _keyEqual;
128   T[] _values;
129   IdSet _idSet;
130 }