29 class StatusFlagManager {
31 StatusFlagManager(Header *header,
UInt32 status_flag)
35 ~StatusFlagManager() {
44 StatusFlagManager(
const StatusFlagManager &);
45 StatusFlagManager &operator=(
const StatusFlagManager &);
63 double num_nodes_per_key,
64 double average_key_length) {
67 if (num_nodes_per_key < 1.0) {
72 if (average_key_length < 1.0) {
78 if (max_num_keys == 0) {
90 new_trie.create_file(file_name, file_size, max_num_keys,
91 num_nodes_per_key, average_key_length);
96 const char *file_name,
99 double num_nodes_per_key,
100 double average_key_length) {
103 if (num_nodes_per_key < 1.0) {
112 if (average_key_length < 1.0) {
122 if (max_num_keys == 0) {
123 if (file_size == 0) {
136 new_trie.create_file(file_name, file_size, max_num_keys,
137 num_nodes_per_key, average_key_length);
138 new_trie.build_from_trie(trie);
146 new_trie.repair_trie(trie);
154 new_trie.open_file(file_name);
163 file_.
swap(&trie->file_);
165 nodes_.
swap(&trie->nodes_);
166 blocks_.
swap(&trie->blocks_);
167 entries_.
swap(&trie->entries_);
168 key_buf_.
swap(&trie->key_buf_);
171 void Trie::create_file(
const char *file_name,
174 double num_nodes_per_key,
175 double average_key_length) {
178 if (max_num_keys == 0) {
180 const double num_bytes_per_key = (
sizeof(
Node) * num_nodes_per_key)
183 +
sizeof(
UInt32) +
sizeof(
UInt8) + average_key_length + 1.5;
187 max_num_keys = (
UInt32)(avail / num_bytes_per_key);
202 if (file_size == 0) {
209 const UInt64 total_num_bytes = (
UInt64)(total_key_length)
211 + (
UInt32)(max_num_keys * 1.5);
214 key_buf_size = (
UInt32)(total_num_bytes /
sizeof(
UInt32));
216 file_size =
sizeof(Header)
222 const UInt64 avail = file_size -
sizeof(Header)
230 create_file(file_name, file_size, max_num_keys,
231 max_num_blocks, key_buf_size);
234 void Trie::create_file(
const char *file_name,
240 + (
sizeof(Block) * max_num_blocks)
241 + (
sizeof(Node) *
BLOCK_SIZE * max_num_blocks)
242 + (
sizeof(Entry) * max_num_keys)
243 + (
sizeof(
UInt32) * key_buf_size)));
245 file_.
create(file_name, file_size);
247 Header *
const header =
static_cast<Header *
>(file_.
ptr());
249 header->set_file_size(file_size);
250 header->set_max_num_keys(max_num_keys);
251 header->set_max_num_blocks(max_num_blocks);
252 header->set_key_buf_size(key_buf_size);
254 map_address(file_.
ptr());
260 void Trie::open_file(
const char *file_name) {
263 file_.
open(file_name);
264 map_address(file_.
ptr());
268 void Trie::map_address(
void *address) {
271 header_ =
static_cast<Header *
>(address);
274 entries_.
assign(reinterpret_cast<Entry *>(blocks_.
end()) - 1,
279 > static_cast<void *>(static_cast<char *>(address) +
file_size()));
282 void Trie::build_from_trie(
const Trie &trie) {
296 void Trie::build_from_trie(
const Trie &trie,
UInt32 src,
UInt32 dest) {
298 if (trie.ith_node(src).is_linker()) {
299 const Key &key = trie.get_key(trie.ith_node(src).key_pos());
301 key.id(), key.ptr(), key.length());
309 const UInt32 src_offset = trie.ith_node(src).offset();
315 UInt32 label = trie.ith_node(src).child();
318 const UInt32 child = src_offset ^ label;
319 if (trie.ith_node(child).is_linker() ||
321 labels[num_labels++] = label;
323 label = trie.ith_node(child).sibling();
325 if (num_labels == 0) {
329 dest_offset = find_offset(labels, num_labels);
330 for (
UInt32 i = 0;
i < num_labels; ++
i) {
331 const UInt32 child = dest_offset ^ labels[
i];
334 if ((i + 1) < num_labels) {
347 build_from_trie(trie, src_offset ^ label, dest_offset ^ label);
352 void Trie::repair_trie(
const Trie &trie) {
353 Vector<UInt32> valid_ids;
358 const Entry &
entry = trie.ith_entry(i);
359 if (entry.is_valid()) {
360 valid_ids.push_back(i);
362 const Key &key = trie.get_key(entry.key_pos());
364 key.id(), key.ptr(), key.length());
377 prev_invalid_key_id =
i;
383 mkq_sort(valid_ids.begin(), valid_ids.end(), 0);
384 build_from_keys(valid_ids.begin(), valid_ids.end(), 0,
ROOT_NODE_ID);
387 void Trie::build_from_keys(
const UInt32 *begin,
const UInt32 *end,
389 if ((end - begin) == 1) {
406 for (++it; it < end; ++it) {
408 if ((
UInt8)key[depth] != labels[num_labels - 1]) {
409 labels[num_labels++] = (
UInt8)key[depth];
414 offset = find_offset(labels, num_labels);
416 for (
UInt32 i = 0; i < num_labels; ++
i) {
417 const UInt32 next = offset ^ labels[
i];
431 build_from_keys(begin, begin + 1, depth + 1, offset ^
TERMINAL_LABEL);
436 for (
const UInt32 *it = begin + 1; it < end; ++it) {
438 if ((
UInt8)key[depth] != label) {
439 build_from_keys(begin, it, depth + 1, offset ^ label);
440 label = (
UInt8)key[depth];
444 build_from_keys(begin, end, depth + 1, offset ^ label);
454 const int pivot = get_median(*l, *(l + (r - l) / 2), *(r - 1), depth);
457 const int label = get_label(*pl, depth);
460 }
else if (label == pivot) {
461 swap_ids(pl, pivot_l);
467 const int label = get_label(*--pr, depth);
470 }
else if (label == pivot) {
471 swap_ids(pr, --pivot_r);
480 while (pivot_l > l) {
481 swap_ids(--pivot_l, --pl);
483 while (pivot_r < r) {
484 swap_ids(pivot_r, pr);
489 if (((pl - l) > (pr - pl)) || ((r - pr) > (pr - pl))) {
491 mkq_sort(pl, pr, depth + 1);
494 if ((pl - l) < (r - pr)) {
496 mkq_sort(l, pl, depth);
501 mkq_sort(pr, r, depth);
507 mkq_sort(l, pl, depth);
511 mkq_sort(pr, r, depth);
522 insertion_sort(l, r, depth);
527 for (
UInt32 *i = l + 1; i < r; ++
i) {
528 for (
UInt32 *j = i; j > l; --j) {
529 if (less_than(*(j - 1), *j, depth)) {
538 const int x = get_label(a, depth);
539 const int y = get_label(b, depth);
540 const int z = get_label(c, depth);
556 int Trie::get_label(
UInt32 key_id,
UInt32 depth)
const {
557 const Key &key =
ith_key(key_id);
558 if (depth == key.length()) {
561 return (
UInt8)key[depth];
566 const Key &lhs_key =
ith_key(lhs);
567 const Key &rhs_key =
ith_key(rhs);
568 const UInt32 length = (lhs_key.length() < rhs_key.length()) ?
569 lhs_key.length() : rhs_key.length();
570 for (
UInt32 i = depth; i < length; ++
i) {
571 if (lhs_key[i] != rhs_key[i]) {
575 return lhs_key.length() < rhs_key.length();
589 if (!search_linker(ptr, length, node_id, query_pos)) {
594 if (!base.is_linker()) {
598 if (
get_key(base.key_pos()).equals_to(ptr, length, query_pos)) {
599 if (key_pos != NULL) {
607 bool Trie::search_linker(
const UInt8 *ptr,
UInt32 length,
611 for ( ; query_pos < length; ++query_pos) {
613 if (base.is_linker()) {
617 const UInt32 next = base.offset() ^ ptr[query_pos];
618 if (
ith_node(next).label() != ptr[query_pos]) {
625 if (base.is_linker()) {
637 bool Trie::lcp_search_key(
const UInt8 *ptr,
UInt32 length,
645 for ( ; query_pos < length; ++query_pos) {
647 if (base.is_linker()) {
648 const Key &key =
get_key(base.key_pos());
649 if ((key.length() <= length) &&
650 key.equals_to(ptr, key.length(), query_pos)) {
651 if (key_pos != NULL) {
652 *key_pos = base.key_pos();
660 const Base linker_base =
662 if (linker_base.is_linker()) {
663 if (key_pos != NULL) {
664 *key_pos = linker_base.key_pos();
670 node_id = base.offset() ^ ptr[query_pos];
671 if (
ith_node(node_id).label() != ptr[query_pos]) {
677 if (base.is_linker()) {
678 const Key &key =
get_key(base.key_pos());
679 if (key.length() <= length) {
680 if (key_pos != NULL) {
681 *key_pos = base.key_pos();
687 if (linker_base.is_linker()) {
688 if (key_pos != NULL) {
689 *key_pos = linker_base.key_pos();
697 bool Trie::remove_key(
const UInt8 *ptr,
UInt32 length) {
699 StatusFlagManager status_flag_manager(header_,
REMOVING_FLAG);
705 if (!search_linker(ptr, length, node_id, query_pos)) {
710 if (!
get_key(key_pos).equals_to(ptr, length, query_pos)) {
733 search_linker(ptr, length, node_id, query_pos);
734 if (!insert_linker(ptr, length, node_id, query_pos)) {
735 if (key_pos != NULL) {
742 const UInt32 new_key_pos = append_key(ptr, length, new_key_id);
755 if (key_pos != NULL) {
756 *key_pos = new_key_pos;
761 bool Trie::insert_linker(
const UInt8 *ptr,
UInt32 length,
763 if (
ith_node(node_id).is_linker()) {
766 while ((i < length) && (i < key.length())) {
767 if (ptr[i] != key[i]) {
772 if ((i == length) && (i == key.length())) {
778 for (
UInt32 j = query_pos; j <
i; ++j) {
779 node_id = insert_node(node_id, ptr[j]);
781 node_id = separate(ptr, length, node_id, i);
787 const UInt16 label = (query_pos < length) ?
791 !
ith_node(base.offset() ^ label).is_phantom()) {
792 resolve(node_id, label);
794 node_id = insert_node(node_id, label);
799 bool Trie::update_key(
const Key &key,
const UInt8 *ptr,
UInt32 length,
802 StatusFlagManager status_flag_manager(header_,
UPDATING_FLAG);
806 if (!key.is_valid()) {
813 search_linker(ptr, length, node_id, query_pos);
814 if (!insert_linker(ptr, length, node_id, query_pos)) {
815 if (key_pos != NULL) {
821 const UInt32 new_key_pos = append_key(ptr, length, key.id());
823 ith_entry(key.id()).set_key_pos(new_key_pos);
825 if (key_pos != NULL) {
826 *key_pos = new_key_pos;
832 !search_linker(static_cast<const UInt8 *>(key.ptr()), key.length(),
833 node_id, query_pos));
845 offset = find_offset(&label, 1);
847 offset = base.offset();
850 const UInt32 next = offset ^ label;
854 if (base.is_linker()) {
877 UInt32 prev = offset ^ child_label;
880 while (label > sibling_label) {
881 prev = offset ^ sibling_label;
912 const Key &key =
get_key(key_pos);
919 const UInt32 offset = find_offset(labels, 2);
921 UInt32 next = offset ^ labels[0];
928 next = offset ^ labels[1];
961 labels[num_labels++] = next_label;
966 labels[num_labels] = label;
967 offset = find_offset(labels, num_labels + 1);
968 migrate_nodes(node_id, offset, labels, num_labels);
970 offset = find_offset(&label, 1);
980 void Trie::migrate_nodes(
UInt32 node_id,
UInt32 dest_offset,
992 for (
UInt32 i = 0; i < num_labels; ++
i) {
993 const UInt32 src_node_id = src_offset ^ labels[
i];
994 const UInt32 dest_node_id = dest_offset ^ labels[
i];
998 reserve_node(dest_node_id);
1000 ith_node(src_node_id).except_is_offset());
1018 while (num_labels >= (1U << level)) {
1031 UInt32 block_id = leader;
1033 const Block &block =
ith_block(block_id);
1040 const UInt32 offset = node_id ^ labels[0];
1041 if (!
ith_node(offset).is_offset()) {
1043 for ( ; i < num_labels; ++
i) {
1044 if (!
ith_node(offset ^ labels[i]).is_phantom()) {
1048 if (i >= num_labels) {
1053 }
while (node_id != first);
1055 const UInt32 prev = block_id;
1056 const UInt32 next = block.next();
1063 update_block_level(prev, level + 1);
1064 if (next == leader) {
1078 void Trie::reserve_node(
UInt32 node_id) {
1096 if ((node_id &
BLOCK_MASK) == block.first_phantom()) {
1099 block.set_first_phantom(next & BLOCK_MASK);
1107 if (block.num_phantoms() == threshold) {
1108 update_block_level(block_id, block.level() + 1);
1111 block.set_num_phantoms(block.num_phantoms() - 1);
1113 node.set_is_phantom(
false);
1121 void Trie::reserve_block(
UInt32 block_id) {
1138 check.set_is_phantom(
true);
1140 for (
UInt32 i = begin; i < end; ++
i) {
1141 check.set_prev((i - 1) & BLOCK_MASK);
1142 check.set_next((i + 1) & BLOCK_MASK);
1148 set_block_level(block_id, 0);
1152 void Trie::update_block_level(
UInt32 block_id,
UInt32 level) {
1156 unset_block_level(block_id);
1157 set_block_level(block_id, level);
1160 void Trie::set_block_level(
UInt32 block_id,
UInt32 level) {
1172 const UInt32 next = leader;
1185 void Trie::unset_block_level(
UInt32 block_id) {
1199 if (next == block_id) {
1205 if (block_id == leader) {