10#ifndef INCLUDED_SDSL_INT_WAVELET_TREE
11#define INCLUDED_SDSL_INT_WAVELET_TREE
58 class t_rank =
typename t_bitvector::rank_1_type,
59 class t_select =
typename t_bitvector::select_1_type,
60 class t_select_zero =
typename t_bitvector::select_0_type>
98 std::vector<value_type> & cs,
99 std::vector<size_type> & rank_c_i,
100 std::vector<size_type> & rank_c_j,
122 if ((j - i) - (ones_before_j - ones_before_i) > 0)
125 size_type new_node_size = node_size - ones_before_end;
128 _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1,
path << 1, new_node_size, new_offset);
132 if ((ones_before_j - ones_before_i) > 0)
134 size_type new_offset = offset + (node_size - ones_before_end) +
m_size;
135 size_type new_node_size = ones_before_end;
138 _interval_symbols(new_i,
168 template <
typename t_it>
176 for (
auto it =
begin; it !=
end; ++it)
179 if (value > max_elem)
183 std::string tree_out_buf_file_name =
tmp_file(tmp_dir,
"_m_tree");
190 osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out);
196 uint64_t tree_word = 0;
202 const uint64_t mask_new = 1ULL << (
m_max_level - k - 1);
208 uint64_t start_value = (rac[i] & mask_old);
210 while (i <
m_size and ((x = rac[i]) & mask_old) == start_value)
214 tree_word |= (1ULL << (tree_pos & 0x3FULL));
219 rac[start + cnt0++] = x;
222 if ((tree_pos & 0x3FULL) == 0)
224 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
233 rac[start + cnt0 + j] = buf1[j];
238 m_sigma += (cnt0 > 0) + (cnt1 > 0);
240 start += cnt0 + cnt1;
243 mask_old += mask_new;
245 if ((tree_pos & 0x3FULL) != 0)
247 tree_out_buf.write((
char *)&tree_word,
sizeof(tree_word));
250 tree_out_buf.
close();
298 *
this = std::move(tmp);
310 m_tree = std::move(wt.m_tree);
354 offset += (node_size - ones_before_end);
355 node_size = ones_before_end;
361 node_size = (node_size - ones_before_end);
362 i = (i - ones_before_i);
396 offset += (node_size - ones_before_end);
397 node_size = ones_before_end;
402 node_size = (node_size - ones_before_end);
403 i = (i - ones_before_i);
432 offset += (node_size - ones_before_end);
433 node_size = ones_before_end;
439 node_size = (node_size - ones_before_end);
440 i = (i - ones_before_i);
444 return std::make_pair(i, c);
458 assert(1 <= i and i <=
rank(
size(), c));
465 m_path_off[0] = m_path_rank_off[0] = 0;
467 for (uint32_t k = 0; k <
m_max_level and node_size; ++k)
470 m_path_rank_off[k] = ones_before_o;
474 offset += (node_size - ones_before_end);
475 node_size = ones_before_end;
479 node_size = (node_size - ones_before_end);
482 m_path_off[k + 1] = offset;
485 if (0ULL == node_size or node_size < i)
488 +
"): c does not occur i times in the WT");
494 offset = m_path_off[k - 1];
495 size_type ones_before_o = m_path_rank_off[k - 1];
533 std::vector<value_type> & cs,
534 std::vector<size_type> & rank_c_i,
535 std::vector<size_type> & rank_c_j)
const
537 assert(i <= j and j <=
size());
547 rank_c_i[0] = res.first;
548 rank_c_j[0] = res.first + 1;
553 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0,
m_size, 0);
569 template <
class t_ret_type = std::tuple<
size_type,
size_type,
size_type>>
572 assert(i <= j and j <=
size());
575 return t_ret_type{0, j - i, 0};
590 offset += (node_size - ones_before_end);
591 node_size = ones_before_end;
592 smaller += j - i - ones_before_j + ones_before_i;
598 node_size -= ones_before_end;
599 greater += ones_before_j - ones_before_i;
606 return t_ret_type{i, smaller, greater};
619 template <
class t_ret_type = std::tuple<
size_type,
size_type>>
625 return t_ret_type{0, i};
638 offset += (node_size - ones_before_end);
639 node_size = ones_before_end;
640 result += i - ones_before_i;
645 node_size = (node_size - ones_before_end);
651 return t_ret_type{i, result};
663 std::pair<size_type, std::vector<std::pair<value_type, size_type>>>
667 std::vector<size_type> ones_before_os(
m_max_level + 1);
675 _range_search_2d(lb, rb, vlb, vrb, 0, 0,
m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers);
676 return make_pair(cnt_answers, point_vec);
686 std::vector<size_type> & offsets,
687 std::vector<size_type> & ones_before_os,
699 for (
size_type j = lb + 1; j <= rb + 1; ++j)
706 size_type ones_before_o = ones_before_os[k - 1];
717 point_vec.emplace_back(i - 1,
path);
720 cnt_answers += rb - lb + 1;
729 ones_before_os[level] = ones_before_o;
733 size_type zeros_before_o = offset - ones_before_o;
734 size_type zeros_before_lb = offset + lb - ones_before_lb;
735 size_type zeros_before_rb = offset + rb + 1 - ones_before_rb;
736 size_type zeros_before_end = offset + node_size - ones_before_end;
737 if (vlb < mid and mid)
739 size_type nlb = zeros_before_lb - zeros_before_o;
740 size_type nrb = zeros_before_rb - zeros_before_o;
741 offsets[level + 1] = offset +
m_size;
746 std::min(vrb, mid - 1),
749 zeros_before_end - zeros_before_o,
759 size_type nlb = ones_before_lb - ones_before_o;
760 size_type nrb = ones_before_rb - ones_before_o;
761 offsets[level + 1] = offset +
m_size + (zeros_before_end - zeros_before_o);
769 ones_before_end - ones_before_o,
798 written_bytes +=
m_tree.serialize(out, child,
"tree");
799 written_bytes +=
m_tree_rank.serialize(out, child,
"tree_rank");
800 written_bytes +=
m_tree_select1.serialize(out, child,
"tree_select_1");
801 written_bytes +=
m_tree_select0.serialize(out, child,
"tree_select_0");
804 return written_bytes;
819 template <
typename archive_t>
831 template <
typename archive_t>
857 return !(*
this == other);
936 bool bit = *(
begin(vv) + i);
937 i = std::get<1>(rs[bit]);
971 return expand(std::move(v_right));
986 v_left.
size = v.size - ones;
987 v_left.
level = v.level + 1;
988 v_left.
sym = v.sym << 1;
992 v.level = v.level + 1;
993 v.sym = (v.sym << 1) | 1;
995 return {{std::move(v_left), v}};
1010 auto ranges_copy = ranges;
1011 return expand(v, std::move(ranges_copy));
1029 for (
auto & r : ranges)
1033 auto left_size = (r[1] - r[0] + 1) - right_size;
1035 auto right_sp = sp_rank - v_sp_rank;
1036 auto left_sp = r[0] - right_sp;
1038 r = {{left_sp, left_sp + left_size - 1}};
1039 res[i++] = {{right_sp, right_sp + right_size - 1}};
1041 return {{ranges, std::move(res)}};
1059 auto left_size = (r[1] - r[0] + 1) - right_size;
1061 auto right_sp = sp_rank - v_sp_rank;
1062 auto left_sp = r[0] - right_sp;
1064 return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}};
1075 auto begin(node_type
const & v)
const ->
decltype(
m_tree.begin() + v.offset)
1077 return m_tree.begin() + v.offset;
1081 auto end(node_type
const & v)
const ->
decltype(
m_tree.begin() + v.offset + v.size)
1083 return m_tree.begin() + v.offset + v.size;
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
void close(bool remove_file=false)
Close the int_vector_buffer.
A generic vector class for integers of width .
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
void close()
Close the stream.
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
bool operator!=(wt_int const &other) const noexcept
Inequality operator.
bool operator==(wt_int const &other) const noexcept
Equality operator.
select_0_type m_tree_select0
uint32_t const & max_level
Maximal level of the wavelet tree.
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
node_type root() const
Return the root node.
wt_int & operator=(wt_int &&wt)
Assignment move operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
wt_int(wt_int const &wt)
Copy constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
size_type size() const
Returns the size of the original vector.
std::pair< size_type, point_vec_type > r2d_res_type
bool is_leaf(node_type const &v) const
Checks if the node is a leaf node.
auto seq(node_type const &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
t_select_zero select_0_type
std::pair< value_type, size_type > point_type
wt_int()=default
Default constructor.
bit_vector_type const & tree
A concatenation of all bit vectors of the wavelet tree.
int_vector ::value_type value_type
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
t_bitvector bit_vector_type
auto bit_vec(node_type const &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type const &ranges) const
Returns for each range its left and right child ranges.
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
std::array< range_type, 2 > expand(node_type const &v, range_type const &r) const
Returns for a range its left and right child ranges.
bool empty(node_type const &v) const
Indicates if node v is empty.
wt_int & operator=(wt_int const &wt)
Assignment operator.
random_access_const_iterator< wt_int > const_iterator
wt_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
int_alphabet_tag alphabet_category
size_type const & sigma
Effective alphabet size of the wavelet tree.
wt_int(wt_int &&wt)
Move constructor.
std::vector< point_type > point_vec_type
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
int_vector ::size_type size_type
value_type sym(node_type const &v) const
Returns the symbol of leaf node v.
void _range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, size_type level, size_type ilb, size_type node_size, std::vector< size_type > &offsets, std::vector< size_type > &ones_before_os, size_type path, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
std::array< node_type, 2 > expand(node_type const &v) const
Returns the two child nodes of an inner node.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
const_iterator end() const
Returns a const_iterator to the element after the last element.
auto size(node_type const &v) const -> decltype(v.size)
Return the size of node v.
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
t_bitvector::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
void load(std::istream &in)
Loads the data structure from the given istream.
t_ret_type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
t_ret_type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
select_1_type m_tree_select1
bool empty() const
Returns whether the wavelet tree contains no data.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
std::string to_string(T const &t, int w=1)
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
std::string tmp_file(cache_config const &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
std::string ram_file_name(std::string const &file)
Returns the corresponding RAM-file name for file.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
bool load_from_file(T &v, std::string const &file)
Load sdsl-object v from a file.
void read_member(T &t, std::istream &in)
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
std::vector< range_type > range_vec_type
Contains declarations and definitions of data structure concepts.
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Represents a node in the wavelet tree.
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
node_type(node_type &&)=default
bool operator<(node_type const &v) const
bool operator>(node_type const &v) const
node_type(node_type const &)=default
node_type & operator=(node_type &&)=default
bool operator==(node_type const &v) const
node_type & operator=(node_type const &)=default
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.