1 // Copyright 2016 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS-IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 16 // Original Author: ericv@google.com (Eric Veach) 17 // Converted to D: madric@gmail.com (Vijay Nayar) 18 19 module s2.value_lexicon; 20 21 import s2.util.container.dense_hash_set; 22 23 import std.range; 24 25 /** 26 * ValueLexicon is a class that maps distinct values to sequentially numbered 27 * integer identifiers. It automatically eliminates duplicates and uses a 28 * compact representation. See also SequenceLexicon. 29 * 30 * Each distinct value is mapped to a 32-bit integer. The space used for each 31 * value is approximately 7 bytes plus the space needed for the value itself. 32 * For example, int64 values would need approximately 15 bytes each. Note 33 * also that values are referred to using 32-bit ids rather than 64-bit 34 * pointers. 35 * 36 * This class has the same thread-safety properties as "string": const methods 37 * are thread safe, and non-const methods are not thread safe. 38 * 39 * Example usage: 40 * 41 * ValueLexicon<string> lexicon; 42 * uint32 cat_id = lexicon.Add("cat"); 43 * EXPECT_EQ(cat_id, lexicon.Add("cat")); 44 * EXPECT_EQ("cat", lexicon.value(cat_id)); 45 * 46 */ 47 class ValueLexicon(T) { 48 public: 49 this() { 50 // Simply use the defaults used in the DenseHashSet. 51 _hasher = &hash!T; 52 _keyEqual = &equalTo!T; 53 _idSet = new IdSet(0, new IdHasher(), new IdKeyEqual()); 54 _idSet.setEmptyKey(EMPTY_KEY); 55 } 56 57 this(ValueLexicon!T x) { 58 _hasher = &hash!T; 59 _keyEqual = &equalTo!T; 60 _values = x._values.dup; 61 _idSet = new IdSet( 62 x._idSet.begin(), x._idSet.end(), EMPTY_KEY, 0, new IdHasher(), new IdKeyEqual()); 63 } 64 65 /// Clears all data from the lexicon. 66 void clear() { 67 _values.length = 0; 68 _idSet.clear(); 69 } 70 71 /// Add the given value to the lexicon if it is not already present, and 72 /// return its integer id. Ids are assigned sequentially starting from zero. 73 uint add(in T value) { 74 if (!_values.empty() && _keyEqual(value, _values.back())) { 75 return cast(uint) _values.length - 1; 76 } 77 _values ~= value; 78 auto result = _idSet.insert(cast(uint) _values.length - 1); 79 if (result.second) { 80 return cast(uint) _values.length - 1; 81 } else { 82 _values.popBack(); 83 return *(result.first); 84 } 85 } 86 87 /// Return the number of values in the lexicon. 88 uint size() const { 89 return cast(uint) _values.length; 90 } 91 92 /// Return the value with the given id. 93 const(T) value(uint id) const { 94 return _values[id]; 95 } 96 97 private: 98 // Choose kEmptyKey to be the last key that will ever be generated. 99 enum int EMPTY_KEY = int.max; 100 101 class IdHasher { 102 public: 103 this() { } 104 size_t opCall()(uint id) const { 105 return _hasher(value(id)); 106 } 107 } 108 109 class IdKeyEqual { 110 public: 111 this() { } 112 bool opCall(in uint id1, in uint id2) const { 113 if (id1 == id2) return true; 114 if (id1 == EMPTY_KEY || id2 == EMPTY_KEY) { 115 return false; 116 } 117 return _keyEqual(value(id1), value(id2)); 118 } 119 } 120 121 alias IdSet = DenseHashSet!(uint, IdHasher, IdKeyEqual); 122 123 alias Hasher = size_t function(in T); 124 alias KeyEqual = bool function(in T, in T); 125 126 Hasher _hasher; 127 KeyEqual _keyEqual; 128 T[] _values; 129 IdSet _idSet; 130 }