9#ifndef INCLUDED_SDSL_WT_PC
10#define INCLUDED_SDSL_WT_PC
53template <
class t_shape,
55 class t_rank =
typename t_bitvector::rank_1_type,
56 class t_select =
typename t_bitvector::select_1_type,
57 class t_select_zero =
typename t_bitvector::select_0_type,
58 class t_tree_strat = byte_tree<>>
74 typedef typename t_shape::template type<wt_pc>
shape_type;
79 using node_type =
typename tree_strat_type::node_type;
99 uint64_t p = m_tree.bit_path(old_chr);
100 uint32_t path_len = p >> 56;
102 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
106 bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times);
108 bv_node_pos[v] += times;
109 v = m_tree.child(v, p & 1);
114 size_type construct_tree_shape(std::vector<size_type>
const & C)
117 std::vector<pc_node> temp_nodes;
118 shape_type::construct_tree(C, temp_nodes);
126 void construct_init_rank_select()
128 util::init_support(m_bv_rank, &m_bv);
129 util::init_support(m_bv_select0, &m_bv);
130 util::init_support(m_bv_select1, &m_bv);
137 std::vector<value_type> & cs,
138 std::vector<size_type> & rank_c_i,
139 std::vector<size_type> & rank_c_j,
143 size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
144 size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
151 if (!m_tree.is_leaf(v_new))
153 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new);
159 cs[k++] = m_tree.bv_pos_rank(v_new);
166 if (!m_tree.is_leaf(v_new))
168 _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new);
174 cs[k++] = m_tree.bv_pos_rank(v_new);
193 template <
typename t_it>
202 std::vector<size_type> C;
208 size_type tree_size = construct_tree_shape(C);
213 std::vector<uint64_t> bv_node_pos(m_tree.size(), 0);
214 for (
size_type v = 0; v < m_tree.size(); ++v)
216 bv_node_pos[v] = m_tree.bv_pos(v);
220 for (
auto it =
begin; it !=
end; ++it)
225 insert_char(old_chr, bv_node_pos, times, temp_bv);
234 insert_char(old_chr, bv_node_pos, times, temp_bv);
241 insert_char(old_chr, bv_node_pos, times, temp_bv);
245 construct_init_rank_select();
247 m_tree.init_node_ranks(m_bv_rank);
250 template <
typename t_it>
259 m_bv_rank(wt.m_bv_rank),
260 m_bv_select1(wt.m_bv_select1),
261 m_bv_select0(wt.m_bv_select0),
264 m_bv_rank.set_vector(&m_bv);
265 m_bv_select1.set_vector(&m_bv);
266 m_bv_select0.set_vector(&m_bv);
272 m_bv(std::move(wt.m_bv)),
273 m_bv_rank(std::move(wt.m_bv_rank)),
274 m_bv_select1(std::move(wt.m_bv_select1)),
275 m_bv_select0(std::move(wt.m_bv_select0)),
276 m_tree(std::move(wt.m_tree))
278 m_bv_rank.set_vector(&m_bv);
279 m_bv_select1.set_vector(&m_bv);
280 m_bv_select0.set_vector(&m_bv);
289 *
this = std::move(tmp);
300 m_sigma = wt.m_sigma;
301 m_bv = std::move(wt.m_bv);
302 m_bv_rank = std::move(wt.m_bv_rank);
303 m_bv_rank.set_vector(&m_bv);
304 m_bv_select1 = std::move(wt.m_bv_select1);
305 m_bv_select1.set_vector(&m_bv);
306 m_bv_select0 = std::move(wt.m_bv_select0);
307 m_bv_select0.set_vector(&m_bv);
308 m_tree = std::move(wt.m_tree);
342 while (!m_tree.is_leaf(v))
344 if (m_bv[m_tree.bv_pos(v) + i])
346 i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v);
347 v = m_tree.child(v, 1);
351 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
352 v = m_tree.child(v, 0);
356 return m_tree.bv_pos_rank(v);
374 if (!m_tree.is_valid(m_tree.c_to_leaf(c)))
382 uint64_t p = m_tree.bit_path(c);
383 uint32_t path_len = (p >> 56);
386 for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1)
390 result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
394 result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
396 v = m_tree.child(v, p & 1);
415 while (!m_tree.is_leaf(v))
417 if (m_bv[m_tree.bv_pos(v) + i])
419 i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
420 v = m_tree.child(v, 1);
424 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
425 v = m_tree.child(v, 0);
429 return std::make_pair(i, (
value_type)m_tree.bv_pos_rank(v));
445 assert(1 <= i and i <=
rank(
size(), c));
447 if (!m_tree.is_valid(v))
453 return std::min(i - 1, m_size);
456 uint64_t p = m_tree.bit_path(c);
457 uint32_t path_len = (p >> 56);
459 p <<= (64 - path_len);
460 for (uint32_t l = 0; l < path_len; ++l, p <<= 1)
462 if ((p & 0x8000000000000000ULL) == 0)
464 v = m_tree.parent(v);
465 result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
469 v = m_tree.parent(v);
470 result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
501 std::vector<value_type> & cs,
502 std::vector<size_type> & rank_c_i,
503 std::vector<size_type> & rank_c_j)
const
505 assert(i <= j and j <=
size());
510 else if (1 == m_sigma)
513 cs[0] = m_tree.bv_pos_rank(m_tree.root());
514 rank_c_i[0] = std::min(i, m_size);
515 rank_c_j[0] = std::min(j, m_size);
517 else if ((j - i) == 1)
521 rank_c_i[0] = rc.first;
523 rank_c_j[0] = rank_c_i[0] + 1;
525 else if ((j - i) == 2)
528 rank_c_i[0] = rc.first;
531 rank_c_i[1] = rc.first;
537 rank_c_j[0] = rank_c_i[0] + 2;
544 std::swap(cs[0], cs[1]);
545 std::swap(rank_c_i[0], rank_c_i[1]);
547 rank_c_j[0] = rank_c_i[0] + 1;
548 rank_c_j[1] = rank_c_i[1] + 1;
554 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
573 template <
class t_ret_type = std::tuple<
size_type,
size_type,
size_type>>
574 typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type
577 assert(i <= j and j <=
size());
580 value_type _c = m_tree.bv_pos_rank(m_tree.root());
583 return t_ret_type{i, 0, 0};
587 return t_ret_type{0, 0, j - i};
591 return t_ret_type{0, j - i, 0};
596 return t_ret_type{
rank(i, c), 0, 0};
598 uint64_t p = m_tree.bit_path(c);
599 uint32_t path_len = p >> 56;
605 return t_ret_type{0, 0, j - i};
608 return t_ret_type{0, j - i - std::get<2>(res), std::get<2>(res)};
612 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
614 size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
615 size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
619 smaller += j - r1_2 - i + r1_1;
625 greater += r1_2 - r1_1;
629 v = m_tree.child(v, p & 1);
631 return t_ret_type{i, smaller, greater};
646 template <
class t_ret_type = std::tuple<
size_type,
size_type>>
653 value_type _c = m_tree.bv_pos_rank(m_tree.root());
656 return t_ret_type{i, 0};
660 return t_ret_type{0, 0};
664 return t_ret_type{0, i};
668 uint64_t p = m_tree.bit_path(c);
669 uint32_t path_len = p >> 56;
675 return t_ret_type{0, 0};
678 return t_ret_type{0, std::get<0>(res) + std::get<1>(res)};
683 for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1)
685 size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v));
688 result += all - ones;
695 v = m_tree.child(v, p & 1);
697 return t_ret_type{all, result};
717 written_bytes +=
write_member(m_size, out, child,
"size");
718 written_bytes +=
write_member(m_sigma, out, child,
"sigma");
719 written_bytes += m_bv.serialize(out, child,
"bv");
720 written_bytes += m_bv_rank.serialize(out, child,
"bv_rank");
721 written_bytes += m_bv_select1.serialize(out, child,
"bv_select_1");
722 written_bytes += m_bv_select0.serialize(out, child,
"bv_select_0");
723 written_bytes += m_tree.serialize(out, child,
"tree");
725 return written_bytes;
734 m_bv_rank.load(in, &m_bv);
735 m_bv_select1.load(in, &m_bv);
736 m_bv_select0.load(in, &m_bv);
743 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv)
744 && (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1)
745 && (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree);
751 return !(*
this == other);
754 template <
typename archive_t>
766 template <
typename archive_t>
773 m_bv_rank.set_vector(&m_bv);
775 m_bv_select1.set_vector(&m_bv);
777 m_bv_select0.set_vector(&m_bv);
798 bool bit = *(
begin(vv) + i);
799 i = std::get<1>(rs[bit]);
810 return m_tree.is_leaf(v);
816 return m_tree.bv_pos_rank(v);
834 auto parent = m_tree.parent(v);
836 if (m_tree.child(parent, 0) == v)
837 return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1;
839 return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1;
844 return m_tree.size(v);
851 return m_tree.root();
861 return {{m_tree.child(v, 0), m_tree.child(v, 1)}};
876 auto ranges_copy = ranges;
877 return expand(v, std::move(ranges_copy));
892 auto v_sp_rank = m_tree.bv_pos_rank(v);
895 for (
auto & r : ranges)
897 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
898 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
899 auto left_size = (r[1] - r[0] + 1) - right_size;
901 auto right_sp = sp_rank - v_sp_rank;
902 auto left_sp = r[0] - right_sp;
904 r = {{left_sp, left_sp + left_size - 1}};
905 res[i++] = {{right_sp, right_sp + right_size - 1}};
907 return {{ranges, std::move(res)}};
922 auto v_sp_rank = m_tree.bv_pos_rank(v);
923 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
924 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
925 auto left_size = (r[1] - r[0] + 1) - right_size;
927 auto right_sp = sp_rank - v_sp_rank;
928 auto left_sp = r[0] - right_sp;
930 return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}};
936 uint64_t
path = m_tree.bit_path(c);
937 uint64_t path_len =
path >> 56;
941 return {path_len,
path};
952 return m_tree.symbol_gte(c);
963 return m_tree.symbol_lte(c);
968 auto begin(
node_type const & v)
const ->
decltype(m_bv.begin() + m_tree.bv_pos(v))
970 return m_bv.begin() + m_tree.bv_pos(v);
974 auto end(
node_type const & v)
const ->
decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v))
976 return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v);
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
int_vector_size_type size_type
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A prefix code-shaped wavelet.
value_type sym(node_type const &v) const
Symbol for a leaf.
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
wt_pc & operator=(wt_pc &&wt)
Move assignment operator.
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
auto bit_vec(node_type const &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
t_bitvector::difference_type difference_type
std::pair< bool, value_type > symbol_gte(value_type c) const
Returns for a symbol c the next larger or equal symbol in the WT.
void load(std::istream &in)
Loads the data structure from the given istream.
std::array< range_type, 2 > expand(node_type const &v, range_type const &r) const
Returns for a range its left and right child ranges.
int_vector ::size_type size_type
wt_pc & operator=(wt_pc const &wt)
Assignment operator.
auto seq(node_type const &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
bit_vector_type const & bv
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
tree_strat_type::alphabet_category alphabet_category
t_bitvector bit_vector_type
wt_pc(t_it begin, t_it end, std::string)
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the original vector.
typename tree_strat_type::node_type node_type
node_type root() const
Returns the root node.
bool operator==(wt_pc const &other) const noexcept
Equality operator.
auto size(node_type const &v) const -> decltype(m_tree.size(v))
Return the size of node v.
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type const &ranges) const
Returns for each range its left and right child ranges.
const_iterator end() const
Returns a const_iterator to the element after the last element.
std::pair< bool, value_type > symbol_lte(value_type c) const
Returns for a symbol c the previous smaller or equal symbol in the WT.
wt_pc(t_it begin, t_it end)
Construct the wavelet tree from a sequence defined by two interators.
t_select_zero select_0_type
t_tree_strat::template type< wt_pc > tree_strat_type
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
tree_strat_type::value_type value_type
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
bool empty() const
Returns whether the wavelet tree contains no data.
t_shape::template type< wt_pc > shape_type
bool operator!=(wt_pc const &other) const noexcept
Inequality operator.
bool is_leaf(node_type const &v) const
Checks if the node is a leaf node.
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
std::array< node_type, 2 > expand(node_type const &v) const
Returns the two child nodes of an inner node.
random_access_const_iterator< wt_pc > const_iterator
wt_pc(wt_pc const &wt)
Copy constructor.
bool empty(node_type const &v) const
Indicates if node v is empty.
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(t_rac const &C, sigma_type &sigma)
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
std::vector< range_type > range_vec_type
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
static constexpr uint64_t rev(uint64_t x)
reverses a given 64 bit word
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.