Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
trie.hpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 2 -*- */
2 /* Copyright(C) 2011 Brazil
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Lesser General Public
6  License version 2.1 as published by the Free Software Foundation.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Lesser General Public License for more details.
12 
13  You should have received a copy of the GNU Lesser General Public
14  License along with this library; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 #ifndef GRN_DAT_TRIE_HPP_
19 #define GRN_DAT_TRIE_HPP_
20 
21 #include "array.hpp"
22 #include "header.hpp"
23 #include "node.hpp"
24 #include "block.hpp"
25 #include "entry.hpp"
26 #include "key.hpp"
27 #include "file.hpp"
28 
29 namespace grn {
30 namespace dat {
31 
33  public:
34  Trie();
35  ~Trie();
36 
37  void create(const char *file_name = NULL,
38  UInt64 file_size = 0,
39  UInt32 max_num_keys = 0,
40  double num_nodes_per_key = 0.0,
41  double average_key_length = 0.0);
42 
43  void create(const Trie &trie,
44  const char *file_name = NULL,
45  UInt64 file_size = 0,
46  UInt32 max_num_keys = 0,
47  double num_nodes_per_key = 0.0,
48  double average_key_length = 0.0);
49 
50  void repair(const Trie &trie, const char *file_name = NULL);
51 
52  void open(const char *file_name);
53  void close();
54 
55  void swap(Trie *trie);
56 
57  // Users can access a key by its position or ID.
58  const Key &get_key(UInt32 key_pos) const {
59  GRN_DAT_DEBUG_THROW_IF(key_pos >= next_key_pos());
60  return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos);
61  }
62  // If a specified ID is invalid, e.g. the key is already deleted, this
63  // function returns a reference to an invalid key object whose id() returns
64  // INVALID_KEY_ID.
65  const Key &ith_key(UInt32 key_id) const {
66  if ((key_id >= min_key_id()) && (key_id <= max_key_id()) &&
67  ith_entry(key_id).is_valid()) {
68  return get_key(ith_entry(key_id).key_pos());
69  }
70  return Key::invalid_key();
71  }
72 
73  bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const {
74  return search_key(static_cast<const UInt8 *>(ptr), length, key_pos);
75  }
76  // Longest prefix match search.
77  bool lcp_search(const void *ptr, UInt32 length,
78  UInt32 *key_pos = NULL) const {
79  return lcp_search_key(static_cast<const UInt8 *>(ptr), length, key_pos);
80  }
81 
82  bool remove(UInt32 key_id) {
83  const Key &key = ith_key(key_id);
84  if (key.is_valid()) {
85  return remove(key.ptr(), key.length());
86  }
87  return false;
88  }
89  bool remove(const void *ptr, UInt32 length) {
90  return remove_key(static_cast<const UInt8 *>(ptr), length);
91  }
92 
93  bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) {
94  return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos);
95  }
96 
97  bool update(UInt32 key_id, const void *ptr, UInt32 length,
98  UInt32 *key_pos = NULL) {
99  return update_key(ith_key(key_id), static_cast<const UInt8 *>(ptr),
100  length, key_pos);
101  }
102  bool update(const void *src_ptr, UInt32 src_length,
103  const void *dest_ptr, UInt32 dest_length,
104  UInt32 *key_pos = NULL) {
105  UInt32 src_key_pos;
106  if (!search(src_ptr, src_length, &src_key_pos)) {
107  return false;
108  }
109  const Key &src_key = get_key(src_key_pos);
110  return update_key(src_key, static_cast<const UInt8 *>(dest_ptr),
111  dest_length, key_pos);
112  }
113 
114  const Node &ith_node(UInt32 i) const {
115  GRN_DAT_DEBUG_THROW_IF(i >= num_nodes());
116  return nodes_[i];
117  }
118  const Block &ith_block(UInt32 i) const {
119  GRN_DAT_DEBUG_THROW_IF(i >= num_blocks());
120  return blocks_[i];
121  }
122  const Entry &ith_entry(UInt32 i) const {
123  GRN_DAT_DEBUG_THROW_IF(i < min_key_id());
124  GRN_DAT_DEBUG_THROW_IF(i > max_key_id());
125  return entries_[i];
126  }
127 
128  const Header &header() const {
129  return *header_;
130  }
131 
132  UInt64 file_size() const {
133  return header_->file_size();
134  }
136  return sizeof(Header)
137  + (sizeof(Entry) * num_keys())
138  + (sizeof(Block) * num_blocks())
139  + (sizeof(Node) * num_nodes())
140  + total_key_length();
141  }
143  return header_->total_key_length();
144  }
145  UInt32 num_keys() const {
146  return header_->num_keys();
147  }
148  UInt32 min_key_id() const {
149  return header_->min_key_id();
150  }
151  UInt32 next_key_id() const {
152  return header_->next_key_id();
153  }
154  UInt32 max_key_id() const {
155  return header_->max_key_id();
156  }
158  return header_->max_num_keys();
159  }
160  UInt32 num_nodes() const {
161  return header_->num_nodes();
162  }
164  return header_->num_phantoms();
165  }
166  UInt32 num_zombies() const {
167  return header_->num_zombies();
168  }
170  return header_->max_num_nodes();
171  }
172  UInt32 num_blocks() const {
173  return header_->num_blocks();
174  }
176  return header_->max_num_blocks();
177  }
179  return header_->next_key_pos();
180  }
182  return header_->key_buf_size();
183  }
185  return header_->status_flags();
186  }
187 
189  header_->set_status_flags(status_flags() & ~CHANGING_MASK);
190  }
191 
192  private:
193  File file_;
194  Header *header_;
195  Array<Node> nodes_;
196  Array<Block> blocks_;
197  Array<Entry> entries_;
198  Array<UInt32> key_buf_;
199 
200  void create_file(const char *file_name,
201  UInt64 file_size,
202  UInt32 max_num_keys,
203  double num_nodes_per_key,
204  double average_key_length);
205  void create_file(const char *file_name,
206  UInt64 file_size,
207  UInt32 max_num_keys,
208  UInt32 max_num_blocks,
209  UInt32 key_buf_size);
210 
211  void open_file(const char *file_name);
212 
213  void map_address(void *address);
214 
215  void build_from_trie(const Trie &trie);
216  void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest);
217 
218  void repair_trie(const Trie &trie);
219  void build_from_keys(const UInt32 *begin, const UInt32 *end,
220  UInt32 depth, UInt32 node_id);
221 
222  void mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth);
223  void insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth);
224 
225  inline int get_label(UInt32 key_id, UInt32 depth) const;
226  inline int get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const;
227  inline bool less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const;
228  inline static void swap_ids(UInt32 *lhs, UInt32 *rhs);
229 
230  bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const;
231  bool search_linker(const UInt8 *ptr, UInt32 length,
232  UInt32 &node_id, UInt32 &query_pos) const;
233 
234  bool lcp_search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const;
235 
236  bool remove_key(const UInt8 *ptr, UInt32 length);
237 
238  bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos);
239  bool insert_linker(const UInt8 *ptr, UInt32 length,
240  UInt32 &node_id, UInt32 query_pos);
241 
242  bool update_key(const Key &key, const UInt8 *ptr, UInt32 length,
243  UInt32 *key_pos);
244 
245  UInt32 insert_node(UInt32 node_id, UInt16 label);
246  UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id);
247 
248  UInt32 separate(const UInt8 *ptr, UInt32 length,
249  UInt32 node_id, UInt32 i);
250  void resolve(UInt32 node_id, UInt16 label);
251  void migrate_nodes(UInt32 node_id, UInt32 dest_offset,
252  const UInt16 *labels, UInt32 num_labels);
253 
254  UInt32 find_offset(const UInt16 *labels, UInt32 num_labels);
255 
256  void reserve_node(UInt32 node_id);
257  void reserve_block(UInt32 block_id);
258 
259  void update_block_level(UInt32 block_id, UInt32 level);
260  void set_block_level(UInt32 block_id, UInt32 level);
261  void unset_block_level(UInt32 block_id);
262 
263  Node &ith_node(UInt32 i) {
264  GRN_DAT_DEBUG_THROW_IF(i >= num_nodes());
265  return nodes_[i];
266  }
267  Block &ith_block(UInt32 i) {
268  GRN_DAT_DEBUG_THROW_IF(i >= num_blocks());
269  return blocks_[i];
270  }
271  Entry &ith_entry(UInt32 i) {
272  GRN_DAT_DEBUG_THROW_IF(i < min_key_id());
273  GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1));
274  return entries_[i];
275  }
276 
277  // Disallows copy and assignment.
278  Trie(const Trie &);
279  Trie &operator=(const Trie &);
280 };
281 
282 } // namespace dat
283 } // namespace grn
284 
285 #endif // GRN_DAT_TRIE_HPP_