Groonga 3.0.9 Source Code Document
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
key-cursor.cpp
Go to the documentation of this file.
1 /* -*- c-basic-offset: 2 -*- */
2 /* Copyright(C) 2011 Brazil
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Lesser General Public
6  License version 2.1 as published by the Free Software Foundation.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Lesser General Public License for more details.
12 
13  You should have received a copy of the GNU Lesser General Public
14  License along with this library; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17 
18 #include "key-cursor.hpp"
19 
20 #include <algorithm>
21 #include <cstring>
22 
23 #include "trie.hpp"
24 
25 namespace grn {
26 namespace dat {
27 
29  : trie_(NULL),
30  offset_(0),
31  limit_(MAX_UINT32),
32  flags_(KEY_RANGE_CURSOR),
33  buf_(),
34  count_(0),
35  max_count_(0),
36  finished_(false),
37  end_buf_(NULL),
38  end_str_() {}
39 
41  if (end_buf_ != NULL) {
42  delete [] end_buf_;
43  }
44 }
45 
46 void KeyCursor::open(const Trie &trie,
47  const String &min_str,
48  const String &max_str,
49  UInt32 offset,
50  UInt32 limit,
51  UInt32 flags) {
53  (min_str.ptr() == NULL) && (min_str.length() != 0));
55  (max_str.ptr() == NULL) && (max_str.length() != 0));
56 
57  flags = fix_flags(flags);
58  KeyCursor new_cursor(trie, offset, limit, flags);
59  new_cursor.init(min_str, max_str);
60  new_cursor.swap(this);
61 }
62 
64  KeyCursor new_cursor;
65  new_cursor.swap(this);
66 }
67 
68 const Key &KeyCursor::next() {
69  if (finished_ || (count_ >= max_count_)) {
70  return Key::invalid_key();
71  }
72 
73  if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
74  return ascending_next();
75  } else {
76  return descending_next();
77  }
78 }
79 
80 KeyCursor::KeyCursor(const Trie &trie,
81  UInt32 offset, UInt32 limit, UInt32 flags)
82  : trie_(&trie),
83  offset_(offset),
84  limit_(limit),
85  flags_(flags),
86  buf_(),
87  count_(0),
88  max_count_(0),
89  finished_(false),
90  end_buf_(NULL),
91  end_str_() {}
92 
93 UInt32 KeyCursor::fix_flags(UInt32 flags) const {
94  const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
95  GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
96  (cursor_type != KEY_RANGE_CURSOR));
97  flags |= KEY_RANGE_CURSOR;
98 
99  const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
100  GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
101  (cursor_order != ASCENDING_CURSOR) &&
102  (cursor_order != DESCENDING_CURSOR));
103  if (cursor_order == 0) {
104  flags |= ASCENDING_CURSOR;
105  }
106 
107  const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
109  cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND));
110 
111  return flags;
112 }
113 
114 void KeyCursor::init(const String &min_str, const String &max_str) {
115  if (offset_ > (MAX_UINT32 - limit_)) {
116  max_count_ = MAX_UINT32;
117  } else {
118  max_count_ = offset_ + limit_;
119  }
120 
121  if (limit_ == 0) {
122  return;
123  }
124 
125  if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
126  ascending_init(min_str, max_str);
127  } else {
128  descending_init(min_str, max_str);
129  }
130 }
131 
132 void KeyCursor::ascending_init(const String &min_str, const String &max_str) {
133  if (max_str.ptr() != NULL) {
134  if (max_str.length() != 0) {
135  end_buf_ = new UInt8[max_str.length()];
136  std::memcpy(end_buf_, max_str.ptr(), max_str.length());
137  end_str_.assign(end_buf_, max_str.length());
138  }
139  }
140 
141  if ((min_str.ptr() == NULL) || (min_str.length() == 0)) {
142  buf_.push_back(ROOT_NODE_ID);
143  return;
144  }
145 
146  UInt32 node_id = ROOT_NODE_ID;
147  Node node;
148  for (UInt32 i = 0; i < min_str.length(); ++i) {
149  node = trie_->ith_node(node_id);
150  if (node.is_linker()) {
151  const Key &key = trie_->get_key(node.key_pos());
152  const int result = key.str().compare(min_str, i);
153  if ((result > 0) || ((result == 0) &&
154  ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) {
155  buf_.push_back(node_id);
156  } else if (node.sibling() != INVALID_LABEL) {
157  buf_.push_back(node_id ^ node.label() ^ node.sibling());
158  }
159  return;
160  } else if (node.sibling() != INVALID_LABEL) {
161  buf_.push_back(node_id ^ node.label() ^ node.sibling());
162  }
163 
164  node_id = node.offset() ^ min_str[i];
165  if (trie_->ith_node(node_id).label() != min_str[i]) {
166  UInt16 label = node.child();
167  if (label == TERMINAL_LABEL) {
168  label = trie_->ith_node(node.offset() ^ label).sibling();
169  }
170  while (label != INVALID_LABEL) {
171  if (label > min_str[i]) {
172  buf_.push_back(node.offset() ^ label);
173  break;
174  }
175  label = trie_->ith_node(node.offset() ^ label).sibling();
176  }
177  return;
178  }
179  }
180 
181  node = trie_->ith_node(node_id);
182  if (node.is_linker()) {
183  const Key &key = trie_->get_key(node.key_pos());
184  if ((key.length() != min_str.length()) ||
185  ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) {
186  buf_.push_back(node_id);
187  } else if (node.sibling() != INVALID_LABEL) {
188  buf_.push_back(node_id ^ node.label() ^ node.sibling());
189  }
190  return;
191  } else if (node.sibling() != INVALID_LABEL) {
192  buf_.push_back(node_id ^ node.label() ^ node.sibling());
193  }
194 
195  UInt16 label = node.child();
196  if ((label == TERMINAL_LABEL) &&
197  ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND)) {
198  label = trie_->ith_node(node.offset() ^ label).sibling();
199  }
200  if (label != INVALID_LABEL) {
201  buf_.push_back(node.offset() ^ label);
202  }
203 }
204 
205 void KeyCursor::descending_init(const String &min_str, const String &max_str) {
206  if (min_str.ptr() != NULL) {
207  if (min_str.length() != 0) {
208  end_buf_ = new UInt8[min_str.length()];
209  std::memcpy(end_buf_, min_str.ptr(), min_str.length());
210  end_str_.assign(end_buf_, min_str.length());
211  }
212  }
213 
214  if ((max_str.ptr() == NULL) || (max_str.length() == 0)) {
215  buf_.push_back(ROOT_NODE_ID);
216  return;
217  }
218 
219  UInt32 node_id = ROOT_NODE_ID;
220  for (UInt32 i = 0; i < max_str.length(); ++i) {
221  const Base base = trie_->ith_node(node_id).base();
222  if (base.is_linker()) {
223  const Key &key = trie_->get_key(base.key_pos());
224  const int result = key.str().compare(max_str, i);
225  if ((result < 0) || ((result == 0) &&
226  ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) {
227  buf_.push_back(node_id | POST_ORDER_FLAG);
228  }
229  return;
230  }
231 
232  UInt32 label = trie_->ith_node(node_id).child();
233  if (label == TERMINAL_LABEL) {
234  node_id = base.offset() ^ label;
235  buf_.push_back(node_id | POST_ORDER_FLAG);
236  label = trie_->ith_node(node_id).sibling();
237  }
238  while (label != INVALID_LABEL) {
239  node_id = base.offset() ^ label;
240  if (label < max_str[i]) {
241  buf_.push_back(node_id);
242  } else if (label > max_str[i]) {
243  return;
244  } else {
245  break;
246  }
247  label = trie_->ith_node(node_id).sibling();
248  }
249  if (label == INVALID_LABEL) {
250  return;
251  }
252  }
253 
254  const Base base = trie_->ith_node(node_id).base();
255  if (base.is_linker()) {
256  const Key &key = trie_->get_key(base.key_pos());
257  if ((key.length() == max_str.length()) &&
258  ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
259  buf_.push_back(node_id | POST_ORDER_FLAG);
260  }
261  return;
262  }
263 
264  UInt16 label = trie_->ith_node(node_id).child();
265  if ((label == TERMINAL_LABEL) &&
266  ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
267  buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG);
268  }
269 }
270 
271 void KeyCursor::swap(KeyCursor *cursor) {
272  std::swap(trie_, cursor->trie_);
273  std::swap(offset_, cursor->offset_);
274  std::swap(limit_, cursor->limit_);
275  std::swap(flags_, cursor->flags_);
276  buf_.swap(&cursor->buf_);
277  std::swap(count_, cursor->count_);
278  std::swap(max_count_, cursor->max_count_);
279  std::swap(finished_, cursor->finished_);
280  std::swap(end_buf_, cursor->end_buf_);
281  end_str_.swap(&cursor->end_str_);
282 }
283 
284 const Key &KeyCursor::ascending_next() {
285  while (!buf_.empty()) {
286  const UInt32 node_id = buf_.back();
287  buf_.pop_back();
288 
289  const Node node = trie_->ith_node(node_id);
290  if (node.sibling() != INVALID_LABEL) {
291  buf_.push_back(node_id ^ node.label() ^ node.sibling());
292  }
293 
294  if (node.is_linker()) {
295  const Key &key = trie_->get_key(node.key_pos());
296  if (end_buf_ != NULL) {
297  const int result = key.str().compare(end_str_);
298  if ((result > 0) || ((result == 0) &&
299  ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) {
300  finished_ = true;
301  return Key::invalid_key();
302  }
303  }
304  if (count_++ >= offset_) {
305  return key;
306  }
307  } else if (node.child() != INVALID_LABEL) {
308  buf_.push_back(node.offset() ^ node.child());
309  }
310  }
311  return Key::invalid_key();
312 }
313 
314 const Key &KeyCursor::descending_next() {
315  while (!buf_.empty()) {
316  const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG;
317  const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG;
318 
319  const Base base = trie_->ith_node(node_id).base();
320  if (post_order) {
321  buf_.pop_back();
322  if (base.is_linker()) {
323  const Key &key = trie_->get_key(base.key_pos());
324  if (end_buf_ != NULL) {
325  const int result = key.str().compare(end_str_);
326  if ((result < 0) || ((result == 0) &&
327  ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) {
328  finished_ = true;
329  return Key::invalid_key();
330  }
331  }
332  if (count_++ >= offset_) {
333  return key;
334  }
335  }
336  } else {
337  buf_.back() |= POST_ORDER_FLAG;
338  UInt16 label = trie_->ith_node(node_id).child();
339  while (label != INVALID_LABEL) {
340  buf_.push_back(base.offset() ^ label);
341  label = trie_->ith_node(base.offset() ^ label).sibling();
342  }
343  }
344  }
345  return Key::invalid_key();
346 }
347 
348 } // namespace dat
349 } // namespace grn