SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_WT_HELPER
5#define INCLUDED_SDSL_WT_HELPER
6
7#include <array>
8#include <deque>
9#include <istream>
10#include <stddef.h>
11#include <stdexcept>
12#include <stdint.h>
13#include <string>
14#include <utility>
15#include <vector>
16
17#include <sdsl/cereal.hpp>
18#include <sdsl/int_vector.hpp>
19#include <sdsl/io.hpp>
22#include <sdsl/util.hpp>
23
24namespace sdsl
25{
26
27typedef std::array<int_vector<>::size_type, 2> range_type;
28typedef std::vector<range_type> range_vec_type;
29
31
34bool empty(range_type const & r);
35
37
41
43
46template <typename t_it, typename t_rac>
47void calculate_character_occurences(t_it begin, t_it end, t_rac & C)
48{
49 C = t_rac();
50 for (auto it = begin; it != end; ++it)
51 {
52 uint64_t c = *it;
53 if (c >= C.size())
54 {
55 C.resize(c + 1, 0);
56 }
57 ++C[c];
58 }
59}
60
61template <typename t_rac, typename sigma_type>
62void calculate_effective_alphabet_size(t_rac const & C, sigma_type & sigma)
63{
64 sigma = std::count_if(begin(C),
65 end(C),
66 [](decltype(*begin(C)) & x)
67 {
68 return x > 0;
69 });
70}
71
72struct pc_node
73{
74 uint64_t freq; // frequency of symbol sym
75 uint64_t sym; // symbol
76 uint64_t parent; // pointer to the parent
77 uint64_t child[2]; // pointer to the children
78
79 enum : uint64_t
80 {
81 undef = 0xFFFFFFFFFFFFFFFFULL
82 }; // max uint64_t value
83
84 pc_node(uint64_t freq = 0,
85 uint64_t sym = 0,
86 uint64_t parent = undef,
87 uint64_t child_left = undef,
88 uint64_t child_right = undef);
89};
90
91template <typename t_tree_strat_fat>
92struct _node
93{
94 using node_type = typename t_tree_strat_fat::node_type;
95 typedef uint64_t size_type;
96 uint64_t bv_pos = 0; // pointer into the bit_vector, which represents the wavelet tree
97 uint64_t bv_pos_rank = 0; // pre-calculated rank for the prefix up to but not including bv_pos
98 node_type parent = t_tree_strat_fat::undef; // pointer to the parent
99 node_type child[2] = {t_tree_strat_fat::undef, t_tree_strat_fat::undef}; // pointer to the children
100
101 _node(uint64_t bv_pos = 0,
102 uint64_t bv_pos_rank = 0,
103 node_type parent = t_tree_strat_fat::undef,
104 node_type child_left = t_tree_strat_fat::undef,
105 node_type child_right = t_tree_strat_fat::undef) :
106 bv_pos(bv_pos),
109 {
110 child[0] = child_left;
111 child[1] = child_right;
112 }
113
114 _node(_node const &) = default;
115
116 _node & operator=(_node const & v)
117 {
118 if (this != &v)
119 {
120 bv_pos = v.bv_pos;
122 parent = v.parent;
123 child[0] = v.child[0];
124 child[1] = v.child[1];
125 }
126 return *this;
127 }
128
130 {
131 bv_pos = v.freq;
132 bv_pos_rank = v.sym;
133 parent = v.parent;
134 child[0] = v.child[0];
135 child[1] = v.child[1];
136 return *this;
137 }
138
139 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
140 {
141 structure_tree_node * st_child = structure_tree::add_child(v, name, util::class_name(*this));
142 uint64_t written_bytes = 0;
143 written_bytes += write_member(bv_pos, out);
144 written_bytes += write_member(bv_pos_rank, out);
145 written_bytes += write_member(parent, out);
146 out.write((char *)child, 2 * sizeof(child[0]));
147 written_bytes += 2 * sizeof(child[0]);
148 structure_tree::add_size(st_child, written_bytes);
149 return written_bytes;
150 }
151
152 void load(std::istream & in)
153 {
154 read_member(bv_pos, in);
156 read_member(parent, in);
157 in.read((char *)child, 2 * sizeof(child[0]));
158 }
159
160 template <typename archive_t>
161 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
162 {
163 ar(CEREAL_NVP(bv_pos));
165 ar(CEREAL_NVP(parent));
166 ar(CEREAL_NVP(child[0]));
167 ar(CEREAL_NVP(child[1]));
168 }
169
170 template <typename archive_t>
171 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
172 {
173 ar(CEREAL_NVP(bv_pos));
175 ar(CEREAL_NVP(parent));
176 ar(CEREAL_NVP(child[0]));
177 ar(CEREAL_NVP(child[1]));
178 }
179
181 bool operator==(_node const & other) const noexcept
182 {
183 return (bv_pos == other.bv_pos) && (bv_pos_rank == other.bv_pos_rank) && (parent == other.parent)
184 && (child[0] == other.child[0]) && (child[1] == other.child[1]);
185 }
186
188 bool operator!=(_node const & other) const noexcept
189 {
190 return !(*this == other);
191 }
192};
193
194// TODO: version of _byte_tree for lex_ordered tree shapes
195// m_c_to_leaf can be compressed and
196// m_path is only needed for sigma chars
197
198// Strategy class for tree representation of a WT
199template <bool t_dfs_shape, typename t_wt>
201{
203 using value_type = uint8_t;
204 using node_type = uint16_t; // node is represented by index in m_nodes
206 enum : uint16_t
207 {
208 undef = 0xFFFF
209 }; // max uint16_t value
210 enum : uint32_t
211 {
212 fixed_sigma = 256
213 };
214 enum : uint8_t
215 {
216 int_width = 8
217 }; // width of the input integers
218
219 std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
220 node_type m_c_to_leaf[fixed_sigma]; // map symbol c to a leaf in the tree structure
221 // if m_c_to_leaf[c] == undef the char does
222 // not exists in the text
223 uint64_t m_path[fixed_sigma]; // path information for each char; the bits at position
224 // 0..55 hold path information; bits 56..63 the length
225 // of the path in binary representation
226
228 {}
229
230 _byte_tree(std::vector<pc_node> const & temp_nodes, uint64_t & bv_size, t_wt const *)
231 {
232 m_nodes.resize(temp_nodes.size());
233 m_nodes[0] = temp_nodes.back(); // insert root at index 0
234 bv_size = 0;
235 size_t node_cnt = 1;
236 node_type last_parent = undef;
237 std::deque<node_type> q;
238 q.push_back(0);
239 while (!q.empty())
240 {
241 node_type idx;
242 if (!t_dfs_shape)
243 {
244 idx = q.front();
245 q.pop_front();
246 }
247 else
248 {
249 idx = q.back();
250 q.pop_back();
251 }
252 // frq_sum is store in bv_pos value
253 uint64_t frq = m_nodes[idx].bv_pos;
254 m_nodes[idx].bv_pos = bv_size;
255 if (m_nodes[idx].child[0] != undef) // if node is not a leaf
256 bv_size += frq; // add frequency
257 if (idx > 0)
258 { // node is not the root
259 if (last_parent != m_nodes[idx].parent)
260 m_nodes[m_nodes[idx].parent].child[0] = idx;
261 else
262 m_nodes[m_nodes[idx].parent].child[1] = idx;
263 last_parent = m_nodes[idx].parent;
264 }
265 if (m_nodes[idx].child[0] != undef)
266 { // if node is not a leaf
267 for (uint32_t k = 0; k < 2; ++k)
268 { // add children to tree
269 m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
270 m_nodes[node_cnt].parent = idx;
271 q.push_back(node_cnt);
272 m_nodes[idx].child[k] = node_cnt++;
273 }
274 }
275 }
276 // initialize m_c_to_leaf
277 for (uint32_t i = 0; i < fixed_sigma; ++i)
278 m_c_to_leaf[i] = undef; // if c is not in the alphabet m_c_to_leaf[c] = undef
279 for (node_type v = 0; v < m_nodes.size(); ++v)
280 {
281 if (m_nodes[v].child[0] == undef) // if node is a leaf
282 m_c_to_leaf[(uint8_t)m_nodes[v].bv_pos_rank] = v; // calculate value
283 }
284 // initialize path information
285 // Note: In the case of a bfs search order,
286 // we can classify nodes as right child and left child with an easy criterion:
287 // node is a left child, if node%2==1
288 // node is a right child, if node%2==0
289 for (uint32_t c = 0, prev_c = 0; c < fixed_sigma; ++c)
290 {
291 if (m_c_to_leaf[c] != undef)
292 { // if char exists in the alphabet
293 node_type v = m_c_to_leaf[c];
294 uint64_t pw = 0; // path
295 uint64_t pl = 0; // path len
296 while (v != root())
297 { // while node is not the root
298 pw <<= 1;
299 if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
300 pw |= 1ULL;
301 ++pl;
302 v = m_nodes[v].parent; // go up the tree
303 }
304 if (pl > 56)
305 {
306 throw std::logic_error("Code depth greater than 56!!!");
307 }
308 m_path[c] = pw | (pl << 56);
309 prev_c = c;
310 }
311 else
312 {
313 uint64_t pl = 0; // len is 0, good for special case in rank
314 m_path[c] = prev_c | (pl << 56);
315 }
316 }
317 }
318
319 template <typename t_rank_type>
320 void init_node_ranks(t_rank_type const & rank)
321 {
322 for (uint64_t i = 0; i < m_nodes.size(); ++i)
323 {
324 if (m_nodes[i].child[0] != undef) // if node is not a leaf
325 m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
326 }
327 }
328
330 {
331
332 for (uint32_t i = 0; i < fixed_sigma; ++i)
333 m_c_to_leaf[i] = bt.m_c_to_leaf[i];
334 for (uint32_t i = 0; i < fixed_sigma; ++i)
335 m_path[i] = bt.m_path[i];
336 }
337
339 {
340 if (this != &bt)
341 {
342 _byte_tree tmp(bt);
343 *this = std::move(tmp);
344 }
345 return *this;
346 }
347
349 {
350 if (this != &bt)
351 {
352 m_nodes = std::move(bt.m_nodes);
353 for (uint32_t i = 0; i < fixed_sigma; ++i)
354 m_c_to_leaf[i] = bt.m_c_to_leaf[i];
355 for (uint32_t i = 0; i < fixed_sigma; ++i)
356 m_path[i] = bt.m_path[i];
357 }
358 return *this;
359 }
360
362 uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
363 {
364 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
365 uint64_t written_bytes = 0;
366 uint64_t m_nodes_size = m_nodes.size();
367 written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
368 written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
369 out.write((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
370 written_bytes += fixed_sigma * sizeof(m_c_to_leaf[0]); // bytes from previous loop
371 out.write((char *)m_path, fixed_sigma * sizeof(m_path[0]));
372 written_bytes += fixed_sigma * sizeof(m_path[0]); // bytes from previous loop
373 structure_tree::add_size(child, written_bytes);
374 return written_bytes;
375 }
376
378 void load(std::istream & in)
379 {
380 uint64_t m_nodes_size = 0;
381 read_member(m_nodes_size, in);
382 m_nodes = std::vector<data_node>(m_nodes_size);
383 load_vector(m_nodes, in);
384 in.read((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0]));
385 in.read((char *)m_path, fixed_sigma * sizeof(m_path[0]));
386 }
387
388 template <typename archive_t>
389 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
390 {
391 ar(CEREAL_NVP(m_nodes));
393 ar(CEREAL_NVP(m_path));
394 }
395
396 template <typename archive_t>
397 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
398 {
399 ar(CEREAL_NVP(m_nodes));
401 ar(CEREAL_NVP(m_path));
402 }
403
405 bool operator==(_byte_tree const & other) const noexcept
406 {
407 return (m_nodes == other.m_nodes) /* && (m_c_to_leaf == other.m_c_to_leaf) &&
408 (m_path == other.m_path)*/
409 ;
410 }
411
413 bool operator!=(_byte_tree const & other) const noexcept
414 {
415 return !(*this == other);
416 }
417
420 {
421 return m_c_to_leaf[c];
422 }
424 static inline node_type root()
425 {
426 return 0;
427 }
428
430 uint64_t size() const
431 {
432 return m_nodes.size();
433 }
434
436 inline node_type parent(node_type v) const
437 {
438 return m_nodes[v].parent;
439 }
441 inline node_type child(node_type v, uint8_t i) const
442 {
443 return m_nodes[v].child[i];
444 }
445
447 inline bool is_leaf(node_type v) const
448 {
449 return m_nodes[v].child[0] == undef;
450 }
451
453 inline uint64_t size(node_type v) const
454 {
455 auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
456 return bv_pos(next_v) - bv_pos(v);
457 }
458
460 inline uint64_t bit_path(value_type c) const
461 {
462 return m_path[c];
463 }
464
466 inline uint64_t bv_pos(node_type v) const
467 {
468 return m_nodes[v].bv_pos;
469 }
470
472 inline uint64_t bv_pos_rank(node_type v) const
473 {
474 return m_nodes[v].bv_pos_rank;
475 }
476
478 inline bool is_valid(node_type v) const
479 {
480 return v != undef;
481 }
482
484 inline std::pair<bool, value_type> symbol_gte(value_type c) const
485 {
486 for (uint32_t i = c; i < fixed_sigma; i++)
487 {
488 if (m_c_to_leaf[i] != undef)
489 {
490 return {true, i};
491 }
492 }
493 return {false, 0};
494 }
495
497 inline std::pair<bool, value_type> symbol_lte(value_type c) const
498 {
499 for (uint32_t i = c; i > 0; i--)
500 {
501 if (m_c_to_leaf[i] != undef)
502 {
503 return {true, i};
504 }
505 }
506 if (m_c_to_leaf[0] != undef)
507 return {true, 0};
508 return {false, 0};
509 }
510};
511
512// Strategy class for tree representation of a WT
513template <bool t_dfs_shape = false>
515{
516 template <typename t_wt>
518};
519
520// Strategy class for tree representation of a WT
521template <bool t_dfs_shape, typename t_wt>
523{
525 using value_type = uint64_t;
526 using node_type = uint64_t; // node is represented by index in m_nodes
528 enum : uint64_t
529 {
530 undef = 0xFFFFFFFFFFFFFFFFULL
531 }; // max uint64_t value
532 enum : uint8_t
533 {
534 int_width = 0
535 }; // width of the input integers is variable
536
537 std::vector<data_node> m_nodes; // nodes for the prefix code tree structure
538 std::vector<node_type> m_c_to_leaf; // map symbol c to a leaf in the tree structure
539 // if m_c_to_leaf[c] == undef the char does
540 // not exists in the text
541 std::vector<uint64_t> m_path; // path information for each char; the bits at position
542 // 0..55 hold path information; bits 56..63 the length
543 // of the path in binary representation
544
545 _int_tree() = default;
546
547 _int_tree(std::vector<pc_node> const & temp_nodes, uint64_t & bv_size, t_wt const *)
548 {
549 m_nodes.resize(temp_nodes.size());
550 m_nodes[0] = temp_nodes.back(); // insert root at index 0
551 bv_size = 0;
552 size_t node_cnt = 1;
553 node_type last_parent = undef;
554 std::deque<node_type> q;
555 q.push_back(0);
556 uint64_t max_c = 0;
557 while (!q.empty())
558 {
559 node_type idx;
560 if (!t_dfs_shape)
561 {
562 idx = q.front();
563 q.pop_front();
564 }
565 else
566 {
567 idx = q.back();
568 q.pop_back();
569 }
570 // frq_sum is store in bv_pos value
571 uint64_t frq = m_nodes[idx].bv_pos;
572 m_nodes[idx].bv_pos = bv_size;
573 if (m_nodes[idx].child[0] != undef)
574 { // if node is not a leaf
575 bv_size += frq; // add frequency
576 }
577 else if (max_c < m_nodes[idx].bv_pos_rank)
578 { // node is leaf and contains large symbol
579 max_c = m_nodes[idx].bv_pos_rank;
580 }
581 if (idx > 0)
582 { // node is not the root
583 if (last_parent != m_nodes[idx].parent)
584 m_nodes[m_nodes[idx].parent].child[0] = idx;
585 else
586 m_nodes[m_nodes[idx].parent].child[1] = idx;
587 last_parent = m_nodes[idx].parent;
588 }
589 if (m_nodes[idx].child[0] != undef)
590 { // if node is not a leaf
591 for (uint32_t k = 0; k < 2; ++k)
592 { // add children to tree
593 m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]];
594 m_nodes[node_cnt].parent = idx;
595 q.push_back(node_cnt);
596 m_nodes[idx].child[k] = node_cnt++;
597 }
598 }
599 }
600 // initialize m_c_to_leaf
601 // if c is not in the alphabet m_c_to_leaf[c] = undef
602 m_c_to_leaf.resize(max_c + 1, undef);
603 for (node_type v = 0; v < m_nodes.size(); ++v)
604 {
605 if (m_nodes[v].child[0] == undef)
606 { // if node is a leaf
607 uint64_t c = m_nodes[v].bv_pos_rank;
608 m_c_to_leaf[c] = v; // calculate value
609 if (c > max_c)
610 max_c = c;
611 }
612 }
613 m_path = std::vector<uint64_t>(m_c_to_leaf.size(), 0);
614 // initialize path information
615 // Note: In the case of a bfs search order,
616 // we can classify nodes as right child and left child with an easy criterion:
617 // node is a left child, if node%2==1
618 // node is a right child, if node%2==0
619 for (value_type c = 0, prev_c = 0; c < m_c_to_leaf.size(); ++c)
620 {
621 if (m_c_to_leaf[c] != undef)
622 { // if char exists in the alphabet
623 node_type v = m_c_to_leaf[c];
624 uint64_t w = 0; // path
625 uint64_t l = 0; // path len
626 while (v != root())
627 { // while node is not the root
628 w <<= 1;
629 if (m_nodes[m_nodes[v].parent].child[1] == v) // if the node is a right child
630 w |= 1ULL;
631 ++l;
632 v = m_nodes[v].parent; // go up the tree
633 }
634 if (l > 56)
635 {
636 throw std::logic_error("Code depth greater than 56!!!");
637 }
638 m_path[c] = w | (l << 56);
639 prev_c = c;
640 }
641 else
642 {
643 uint64_t pl = 0; // len is 0, good for special case in rank
644 m_path[c] = prev_c | (pl << 56);
645 }
646 }
647 }
648
649 template <typename t_rank_type>
650 void init_node_ranks(t_rank_type const & rank)
651 {
652 for (uint64_t i = 0; i < m_nodes.size(); ++i)
653 {
654 if (m_nodes[i].child[0] != undef) // if node is not a leaf
655 m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos);
656 }
657 }
658
659 _int_tree(_int_tree const & bt) = default;
660 _int_tree(_int_tree && bt) = default;
661
662 _int_tree & operator=(_int_tree const & bt) = default;
663 _int_tree & operator=(_int_tree && bt) = default;
664
666 uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
667 {
668 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
669 uint64_t written_bytes = 0;
670 uint64_t m_nodes_size = m_nodes.size();
671 written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()");
672 written_bytes += serialize_vector(m_nodes, out, child, "m_nodes");
673 uint64_t m_c_to_leaf_size = m_c_to_leaf.size();
674 written_bytes += write_member(m_c_to_leaf_size, out, child, "m_c_to_leaf.size()");
675 written_bytes += serialize_vector(m_c_to_leaf, out, child, "m_c_to_leaf");
676 uint64_t m_path_size = m_path.size();
677 written_bytes += write_member(m_path_size, out, child, "m_path.size()");
678 written_bytes += serialize_vector(m_path, out, child, "m_path");
679 structure_tree::add_size(child, written_bytes);
680 return written_bytes;
681 }
682
684 void load(std::istream & in)
685 {
686 uint64_t m_nodes_size = 0;
687 read_member(m_nodes_size, in);
688 m_nodes = std::vector<data_node>(m_nodes_size);
689 load_vector(m_nodes, in);
690 uint64_t m_c_to_leaf_size = 0;
691 read_member(m_c_to_leaf_size, in);
692 m_c_to_leaf = std::vector<node_type>(m_c_to_leaf_size);
694 uint64_t m_path_size = 0;
695 read_member(m_path_size, in);
696 m_path = std::vector<uint64_t>(m_path_size);
697 load_vector(m_path, in);
698 }
699
701 bool operator==(_int_tree const & other) const noexcept
702 {
703 return (m_nodes == other.m_nodes) && (m_c_to_leaf == other.m_c_to_leaf) && (m_path == other.m_path);
704 }
705
707 bool operator!=(_int_tree const & other) const noexcept
708 {
709 return !(*this == other);
710 }
711
712 template <typename archive_t>
713 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
714 {
715 ar(CEREAL_NVP(m_nodes));
717 ar(CEREAL_NVP(m_path));
718 }
719
720 template <typename archive_t>
721 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
722 {
723 ar(CEREAL_NVP(m_nodes));
725 ar(CEREAL_NVP(m_path));
726 }
727
730 {
731 if (c >= m_c_to_leaf.size())
732 return undef;
733 else
734 return m_c_to_leaf[c];
735 }
737 static inline node_type root()
738 {
739 return 0;
740 }
741
743 uint64_t size() const
744 {
745 return m_nodes.size();
746 }
747
749 inline node_type parent(node_type v) const
750 {
751 return m_nodes[v].parent;
752 }
754 inline node_type child(node_type v, uint8_t i) const
755 {
756 return m_nodes[v].child[i];
757 }
758
760 inline bool is_leaf(node_type v) const
761 {
762 return m_nodes[v].child[0] == undef;
763 }
764
766 inline uint64_t size(node_type v) const
767 {
768 auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1;
769 return bv_pos(next_v) - bv_pos(v);
770 }
771
773 inline uint64_t bit_path(value_type c) const
774 {
775 if (c >= m_path.size())
776 {
777 return m_path.size() - 1;
778 }
779 return m_path[c];
780 }
781
783 inline uint64_t bv_pos(node_type v) const
784 {
785 return m_nodes[v].bv_pos;
786 }
787
789 inline uint64_t bv_pos_rank(node_type v) const
790 {
791 return m_nodes[v].bv_pos_rank;
792 }
793
795 inline bool is_valid(node_type v) const
796 {
797 return v != undef;
798 }
799
801 inline std::pair<bool, value_type> symbol_gte(value_type c) const
802 {
803 if (c >= m_c_to_leaf.size())
804 {
805 return {false, 0};
806 }
807 for (value_type i = c; i < m_c_to_leaf.size(); i++)
808 {
809 if (m_c_to_leaf[i] != undef)
810 {
811 return {true, i};
812 }
813 }
814 return {false, 0};
815 }
816
818 inline std::pair<bool, value_type> symbol_lte(value_type c) const
819 {
820 if (c >= m_c_to_leaf.size())
821 {
822 // return the largest symbol
823 c = m_c_to_leaf.size() - 1;
824 }
825 for (value_type i = c; i > 0; i--)
826 {
827 if (m_c_to_leaf[i] != undef)
828 {
829 return {true, i};
830 }
831 }
832 if (m_c_to_leaf[0] != undef)
833 return {true, 0};
834 return {false, 0};
835 }
836};
837
838// Strategy class for tree representation of a WT
839template <bool t_dfs_shape = false>
841{
842 template <typename t_wt>
844};
845
846template <typename t_bv>
848{
849public:
850 typedef typename t_bv::value_type value_type;
851 typedef typename t_bv::size_type size_type;
852 typedef typename t_bv::difference_type difference_type;
853 typedef typename t_bv::const_iterator iterator;
854
855private:
856 iterator m_begin, m_end;
857
858public:
859 node_bv_container(iterator b, iterator e) : m_begin(b), m_end(e)
860 {}
862 {
863 return *(m_begin + i);
864 }
866 {
867 return m_end - m_begin;
868 }
870 {
871 return m_begin;
872 }
873 iterator end() const
874 {
875 return m_end;
876 }
877};
878
879template <typename t_bv>
881{
882public:
883 typedef typename t_bv::value_type value_type;
884 typedef typename t_bv::size_type size_type;
885 typedef typename t_bv::difference_type difference_type;
886 typedef typename t_bv::const_iterator iterator;
887
888private:
889 iterator m_begin, m_end;
890
891public:
892 node_seq_container(iterator b, iterator e) : m_begin(b), m_end(e)
893 {}
895 {
896 return *(m_begin + i);
897 }
899 {
900 return m_end - m_begin;
901 }
903 {
904 return m_begin;
905 }
906 iterator end() const
907 {
908 return m_end;
909 }
910};
911
912inline bool empty(range_type const & r)
913{
914 return std::get<0>(r) == (std::get<1>(r) + 1);
915}
916
918{
919 return std::get<1>(r) - std::get<0>(r) + 1;
920}
921
922inline pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent, uint64_t child_left, uint64_t child_right) :
923 freq(freq),
924 sym(sym),
925 parent(parent)
926{
927 child[0] = child_left;
928 child[1] = child_right;
929}
930
931} // end namespace sdsl
932#endif
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
int_vector_size_type size_type
size_type size() const
iterator begin() const
node_bv_container(iterator b, iterator e)
t_bv::size_type size_type
t_bv::const_iterator iterator
value_type operator[](size_type i) const
iterator end() const
t_bv::value_type value_type
t_bv::difference_type difference_type
iterator end() const
t_bv::size_type size_type
value_type operator[](size_type i) const
node_seq_container(iterator b, iterator e)
iterator begin() const
t_bv::difference_type difference_type
t_bv::value_type value_type
t_bv::const_iterator iterator
size_type size() const
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(t_rac const &C, sigma_type &sigma)
Definition wt_helper.hpp:62
void load_vector(std::vector< T > &, std::istream &)
Load all elements of a vector from a input stream.
Definition io.hpp:425
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition wt_helper.hpp:47
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
uint64_t serialize_vector(std::vector< T > const &, std::ostream &, sdsl::structure_tree_node *v=nullptr, std::string="")
Serialize each element of an std::vector.
Definition io.hpp:397
std::array< int_vector<>::size_type, 2 > range_type
Definition wt_helper.hpp:27
bool empty(range_type const &r)
Empty range check.
int_vector ::size_type size(range_type const &r)
Size of a range.
std::vector< range_type > range_vec_type
Definition wt_helper.hpp:28
Contains declarations and definitions of data structure concepts.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
_byte_tree & operator=(_byte_tree const &bt)
node_type m_c_to_leaf[fixed_sigma]
_byte_tree(_byte_tree const &bt)
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
bool is_valid(node_type v) const
Return if the node is a valid node.
_byte_tree & operator=(_byte_tree &&bt)
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
static node_type root()
Return the root node of the tree.
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
void init_node_ranks(t_rank_type const &rank)
bool operator==(_byte_tree const &other) const noexcept
Equality operator.
node_type parent(node_type v) const
Return the parent node of v.
uint64_t size(node_type v) const
Return size of an inner node.
uint64_t size() const
Return the number of nodes in the tree.
uint64_t m_path[fixed_sigma]
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bool operator!=(_byte_tree const &other) const noexcept
Inequality operator.
bool is_leaf(node_type v) const
Return if v is a leaf node.
std::vector< data_node > m_nodes
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
void load(std::istream &in)
Loads the data structure from the given istream.
_byte_tree(std::vector< pc_node > const &temp_nodes, uint64_t &bv_size, t_wt const *)
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
_int_tree(_int_tree &&bt)=default
uint64_t bv_pos_rank(node_type v) const
Returns for node v the rank of 1's up to bv_pos(v)
std::vector< data_node > m_nodes
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
void load(std::istream &in)
Loads the data structure from the given istream.
uint64_t bit_path(value_type c) const
Return the path as left/right bit sequence in a uint64_t.
static node_type root()
Return the root node of the tree.
_int_tree()=default
bool operator!=(_int_tree const &other) const noexcept
Inequality operator.
uint64_t value_type
uint64_t bv_pos(node_type v) const
Return the start of the node in the WT's bit vector.
node_type child(node_type v, uint8_t i) const
Return left (i=0) or right (i=1) child node of v.
_int_tree & operator=(_int_tree const &bt)=default
_int_tree(std::vector< pc_node > const &temp_nodes, uint64_t &bv_size, t_wt const *)
uint64_t size(node_type v) const
Return size of an inner node.
node_type parent(node_type v) const
Return the parent node of v.
bool is_leaf(node_type v) const
Return if v is a leaf node.
std::vector< uint64_t > m_path
_int_tree(_int_tree const &bt)=default
node_type c_to_leaf(value_type c) const
Get corresponding leaf for symbol c.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
bool operator==(_int_tree const &other) const noexcept
Equality operator.
std::pair< bool, value_type > symbol_gte(value_type c) const
Return symbol c or the next larger symbol in the wt.
void init_node_ranks(t_rank_type const &rank)
uint64_t node_type
std::pair< bool, value_type > symbol_lte(value_type c) const
Return symbol c or the next smaller symbol in the wt.
uint64_t size() const
Return the number of nodes in the tree.
std::vector< node_type > m_c_to_leaf
_int_tree & operator=(_int_tree &&bt)=default
uint64_t serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
bool is_valid(node_type v) const
Return if the node is a valid node.
bool operator==(_node const &other) const noexcept
Equality operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
uint64_t size_type
Definition wt_helper.hpp:95
uint64_t bv_pos_rank
Definition wt_helper.hpp:97
uint64_t bv_pos
Definition wt_helper.hpp:96
node_type child[2]
Definition wt_helper.hpp:99
_node & operator=(pc_node const &v)
bool operator!=(_node const &other) const noexcept
Inequality operator.
_node(_node const &)=default
node_type parent
Definition wt_helper.hpp:98
_node(uint64_t bv_pos=0, uint64_t bv_pos_rank=0, node_type parent=t_tree_strat_fat::undef, node_type child_left=t_tree_strat_fat::undef, node_type child_right=t_tree_strat_fat::undef)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_node & operator=(_node const &v)
typename t_tree_strat_fat::node_type node_type
Definition wt_helper.hpp:94
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &in)
uint64_t freq
Definition wt_helper.hpp:74
uint64_t parent
Definition wt_helper.hpp:76
uint64_t child[2]
Definition wt_helper.hpp:77
uint64_t sym
Definition wt_helper.hpp:75
pc_node(uint64_t freq=0, uint64_t sym=0, uint64_t parent=undef, uint64_t child_left=undef, uint64_t child_right=undef)
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.