SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
csa_sampling_strategy.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
11
12/*
13 * Text = ABCDEFABCDEF$
14 * 0123456789012
15 * sa_sample_dens = 2
16 * *1 SA *2
17 * * 12 * $
18 * 06 * ABCDEF$
19 * * 00 * ABCDEFABCDEF$
20 * 07 BCDEF$
21 * * 01 BCDEFABCDEF$
22 * 08 * CDEF$
23 * * 02 * CDEFABCDEF$
24 * 09 DEF$
25 * * 03 DEFABCDEF$
26 * 10 * EF$
27 * * 04 * EFABCDEF$
28 * 11 F$
29 * * 05 FABCDEF$
30 *
31 * The first sampling (*1) is called suffix order sampling. It has the advantage, that
32 * we don't need to store a bitvector, which marks the sampled suffixes, since a suffix
33 * at index \‍(i\‍) in the suffix array is marked if \‍( 0 \equiv i \mod sa_sample_dens \‍).
34 *
35 * The second sampling (*2) is called text order sampling. It is also called regular in [1].
36 *
37 * [1] P.Ferragina, J. Siren, R. Venturini: Distribution-Aware Compressed Full-Text Indexes, ESA 2011
38 */
39
40#include <iosfwd>
41#include <set>
42#include <stdint.h>
43#include <string>
44#include <tuple>
45#include <type_traits>
46#include <utility>
47
48#include <sdsl/bits.hpp>
49#include <sdsl/cereal.hpp>
50#include <sdsl/config.hpp>
51#include <sdsl/construct.hpp>
54#include <sdsl/int_vector.hpp>
57#include <sdsl/io.hpp>
59#include <sdsl/ram_fs.hpp>
61#include <sdsl/rrr_vector.hpp>
62#include <sdsl/sd_vector.hpp>
65#include <sdsl/util.hpp>
66#include <sdsl/wt_int.hpp>
67
68namespace sdsl
69{
70
71template <class t_csa, uint8_t t_width = 0>
72class _sa_order_sampling : public int_vector<t_width>
73{
74public:
76 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
77 typedef typename base_type::value_type value_type; //
78 enum
79 {
80 sample_dens = t_csa::sa_sample_dens
81 };
82 enum
83 {
84 text_order = false
85 };
87
91
93 /*
94 * \param cconfig Cache configuration (SA is expected to be cached.).
95 * \param csa Pointer to the corresponding CSA. Not used in this class.
96 * \par Time complexity
97 * Linear in the size of the suffix array.
98 */
99 _sa_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
100 {
102 size_type n = sa_buf.size();
103 this->width(bits::hi(n) + 1);
104 this->resize((n + sample_dens - 1) / sample_dens);
105
106 for (size_type i = 0, cnt_mod = sample_dens, cnt_sum = 0; i < n; ++i, ++cnt_mod)
107 {
108 size_type sa = sa_buf[i];
109 if (sample_dens == cnt_mod)
110 {
111 cnt_mod = 0;
112 base_type::operator[](cnt_sum++) = sa;
113 }
114 }
115 }
116
118 inline bool is_sampled(size_type i) const
119 {
120 return 0 == (i % sample_dens);
121 }
122
125 {
127 }
128};
129
130template <uint8_t t_width = 0>
132{
133 template <class t_csa>
136};
137
138template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
139class _text_order_sampling : public int_vector<t_width>
140{
141private:
142 t_bv m_marked;
143 t_rank m_rank_marked;
144
145public:
147 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
149 typedef t_bv bv_type;
150 enum
151 {
152 sample_dens = t_csa::sa_sample_dens
153 };
154 enum
155 {
156 text_order = true
157 };
159
160 bv_type const & marked = m_marked;
161 t_rank const & rank_marked = m_rank_marked;
162
166
168 /*
169 * \param cconfig Cache configuration (SA is expected to be cached.).
170 * \param csa Pointer to the corresponding CSA. Not used in this class.
171 * \par Time complexity
172 * Linear in the size of the suffix array.
173 */
174 _text_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
175 {
177 size_type n = sa_buf.size();
178 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
179 this->width(bits::hi(n / sample_dens) + 1);
180 this->resize((n + sample_dens - 1) / sample_dens);
181
182 for (size_type i = 0, sa_cnt = 0; i < n; ++i)
183 {
184 size_type sa = sa_buf[i];
185 if (0 == (sa % sample_dens))
186 {
187 marked[i] = 1;
188 base_type::operator[](sa_cnt++) = sa / sample_dens;
189 }
190 }
191 m_marked = std::move(t_bv(marked));
192 util::init_support(m_rank_marked, &m_marked);
193 }
194
197 {
198 m_marked = st.m_marked;
199 m_rank_marked = st.m_rank_marked;
200 m_rank_marked.set_vector(&m_marked);
201 }
202
204 inline bool is_sampled(size_type i) const
205 {
206 return m_marked[i];
207 }
208
211 {
212 return base_type::operator[](m_rank_marked(i)) * sample_dens;
213 }
214
216 {
217 return base_type::operator[](i);
218 }
219
222 {
223 if (this != &st)
224 {
226 m_marked = st.m_marked;
227 m_rank_marked = st.m_rank_marked;
228 m_rank_marked.set_vector(&m_marked);
229 }
230 return *this;
231 }
232
235 {
236 base_type::swap(st);
237 m_marked.swap(st.m_marked);
238 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
239 }
240
241 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
242 {
243 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
244 size_type written_bytes = 0;
245 written_bytes += base_type::serialize(out, child, "samples");
246 written_bytes += m_marked.serialize(out, child, "marked");
247 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
248 structure_tree::add_size(child, written_bytes);
249 return written_bytes;
250 }
251
252 void load(std::istream & in)
253 {
254 base_type::load(in);
255 m_marked.load(in);
256 m_rank_marked.load(in);
257 m_rank_marked.set_vector(&m_marked);
258 }
259
260 template <typename archive_t>
261 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
262 {
264 ar(CEREAL_NVP(m_marked));
265 ar(CEREAL_NVP(m_rank_marked));
266 }
267
268 template <typename archive_t>
269 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
270 {
272 ar(CEREAL_NVP(m_marked));
273 ar(CEREAL_NVP(m_rank_marked));
274 m_rank_marked.set_vector(&m_marked);
275 }
276};
277
278template <class t_bit_vec = sd_vector<>, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
285
286template <class t_csa,
287 class t_bv_sa = sd_vector<>,
288 class t_bv_isa = sd_vector<>,
289 class t_rank_sa = typename t_bv_sa::rank_1_type,
290 class t_select_isa = typename t_bv_isa::select_1_type>
292{
293private:
294 t_bv_sa m_marked_sa;
295 t_rank_sa m_rank_marked_sa;
296 t_bv_isa m_marked_isa;
297 t_select_isa m_select_marked_isa;
298 wt_int<rrr_vector<63>> m_inv_perm;
299
300public:
301 typedef typename bit_vector::size_type size_type; // make typedefs of base_type visible
303 typedef t_bv_sa bv_sa_type;
304 enum
305 {
306 sample_dens = t_csa::sa_sample_dens
307 };
308 enum
309 {
310 text_order = true
311 };
313
314 t_bv_sa const & marked_sa = m_marked_sa;
315 t_rank_sa const & rank_marked_sa = m_rank_marked_sa;
316 t_bv_isa const & marked_isa = m_marked_isa;
317 t_select_isa const & select_marked_isa = m_select_marked_isa;
318
322
324 /*
325 * \param cconfig Cache configuration (SA is expected to be cached.).
326 * \param csa Pointer to the corresponding CSA. Not used in this class.
327 * \par Time complexity
328 * Linear in the size of the suffix array.
329 */
330 _fuzzy_sa_sampling(cache_config & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
331 {
332 {
333 // (2) check, if the suffix array is cached
334 if (!cache_file_exists(conf::KEY_ISA, cconfig))
335 {
336 auto event = memory_monitor::event("ISA");
337 construct_isa(cconfig);
338 }
340 }
341 {
343 size_type n = isa_buf.size();
344 bit_vector marked_isa(n, 0); // temporary bitvector for marked ISA positions
345 bit_vector marked_sa(n, 0); // temporary bitvector for marked SA positions
346 int_vector<> inv_perm((n + sample_dens - 1) / sample_dens, 0, bits::hi(n) + 1);
347 size_type cnt = 0;
348
349 uint64_t min_prev_val = 0;
350 for (size_type i = 0; i < n; i += sample_dens)
351 {
352 size_type pos_min = i;
353 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
354 for (size_type j = i + 1; j < i + sample_dens and j < n; ++j)
355 {
356 if (isa_buf[j] < isa_buf[pos_min])
357 pos_min = j;
358 if (isa_buf[j] >= min_prev_val)
359 {
360 if (pos_cnd == n)
361 {
362 pos_cnd = j;
363 }
364 else if (isa_buf[j] < isa_buf[pos_cnd])
365 {
366 pos_cnd = j;
367 }
368 }
369 }
370 if (pos_cnd == n)
371 { // increasing sequence can not be extended
372 pos_cnd = pos_min;
373 }
374 min_prev_val = isa_buf[pos_cnd];
375 marked_isa[pos_cnd] = 1;
376 inv_perm[cnt++] = min_prev_val;
377 marked_sa[min_prev_val] = 1;
378 }
379 m_marked_isa = std::move(t_bv_isa(marked_isa));
380 util::init_support(m_select_marked_isa, &m_marked_isa);
381 {
383 for (size_type i = 0; i < inv_perm.size(); ++i)
384 {
385 inv_perm[i] = rank_marked_sa(inv_perm[i]);
386 }
387 }
388 util::bit_compress(inv_perm);
389
390 m_marked_sa = std::move(t_bv_sa(marked_sa));
391 util::init_support(m_rank_marked_sa, &m_marked_sa);
392
393 std::string tmp_key =
394 "fuzzy_isa_samples_" + util::to_string(util::pid()) + "_" + util::to_string(util::id());
395 std::string tmp_file_name = cache_file_name(tmp_key, cconfig);
396 store_to_file(inv_perm, tmp_file_name);
397 construct(m_inv_perm, tmp_file_name, 0);
398 sdsl::remove(tmp_file_name);
399 }
400 }
401
404 m_marked_sa(st.m_marked_sa),
405 m_rank_marked_sa(st.m_rank_marked_sa),
406 m_marked_isa(st.m_marked_isa),
407 m_select_marked_isa(st.m_select_marked_isa),
408 m_inv_perm(st.m_inv_perm)
409 {
410 m_rank_marked_sa.set_vector(&m_marked_sa);
411 m_select_marked_isa.set_vector(&m_marked_isa);
412 }
413
416 m_marked_sa(std::move(st.m_marked_sa)),
417 m_rank_marked_sa(std::move(st.m_rank_marked_sa)),
418 m_marked_isa(std::move(st.m_marked_isa)),
419 m_select_marked_isa(std::move(st.m_select_marked_isa)),
420 m_inv_perm(std::move(st.m_inv_perm))
421 {
422 m_rank_marked_sa.set_vector(&m_marked_sa);
423 m_select_marked_isa.set_vector(&m_marked_isa);
424 }
425
427 inline bool is_sampled(size_type i) const
428 {
429 return m_marked_sa[i];
430 }
431
434 {
435 return m_select_marked_isa(m_inv_perm.select(1, m_rank_marked_sa(i)) + 1);
436 }
437
439 inline value_type inv(size_type i) const
440 {
441 return m_inv_perm[i];
442 }
443
445 {
446 return m_inv_perm.size();
447 }
448
451 {
452 if (this != &st)
453 {
454 _fuzzy_sa_sampling tmp(st);
455 *this = std::move(tmp);
456 }
457 return *this;
458 }
459
462 {
463 m_marked_sa = std::move(st.m_marked_sa);
464 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
465 m_marked_isa = std::move(st.m_marked_isa);
466 m_select_marked_isa = std::move(st.m_select_marked_isa);
467 m_inv_perm = std::move(st.m_inv_perm);
468 m_rank_marked_sa.set_vector(&m_marked_sa);
469 m_select_marked_isa.set_vector(&m_marked_isa);
470 return *this;
471 }
472
473 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
474 {
475 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
476 size_type written_bytes = 0;
477 written_bytes += m_marked_sa.serialize(out, child, "marked_sa");
478 written_bytes += m_rank_marked_sa.serialize(out, child, "rank_marked_sa");
479 written_bytes += m_marked_isa.serialize(out, child, "marked_isa");
480 written_bytes += m_select_marked_isa.serialize(out, child, "select_marked_isa");
481 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
482 structure_tree::add_size(child, written_bytes);
483 return written_bytes;
484 }
485
486 void load(std::istream & in)
487 {
488 m_marked_sa.load(in);
489 m_rank_marked_sa.load(in);
490 m_rank_marked_sa.set_vector(&m_marked_sa);
491 m_marked_isa.load(in);
492 m_select_marked_isa.load(in);
493 m_select_marked_isa.set_vector(&m_marked_isa);
494 m_inv_perm.load(in);
495 }
496
497 template <typename archive_t>
498 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
499 {
500 ar(CEREAL_NVP(m_marked_sa));
501 ar(CEREAL_NVP(m_rank_marked_sa));
502 ar(CEREAL_NVP(m_marked_isa));
503 ar(CEREAL_NVP(m_select_marked_isa));
504 ar(CEREAL_NVP(m_inv_perm));
505 }
506
507 template <typename archive_t>
508 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
509 {
510 ar(CEREAL_NVP(m_marked_sa));
511 ar(CEREAL_NVP(m_rank_marked_sa));
512 m_rank_marked_sa.set_vector(&m_marked_sa);
513 ar(CEREAL_NVP(m_marked_isa));
514 ar(CEREAL_NVP(m_select_marked_isa));
515 m_select_marked_isa.set_vector(&m_marked_isa);
516 ar(CEREAL_NVP(m_inv_perm));
517 }
518
520 bool operator==(_fuzzy_sa_sampling const & other) const noexcept
521 {
522 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa)
523 && (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa)
524 && (m_inv_perm == other.m_inv_perm);
525 }
526
528 bool operator!=(_fuzzy_sa_sampling const & other) const noexcept
529 {
530 return !(*this == other);
531 }
532};
533template <class t_bv_sa = sd_vector<>,
534 class t_bv_isa = sd_vector<>,
535 class t_rank_sa = typename t_bv_sa::rank_1_type,
536 class t_select_isa = typename t_bv_isa::select_1_type>
543
544/*
545 * Text = ABCDEFABCDEF$
546 * 0123456789012
547 * sa_sample_dens = 4
548 * sa_sample_chars = {B,E}
549 * SA BWT (1)
550 * 12 F * $
551 * 06 F ABCDEF$
552 * 00 $ * ABCDEFABCDEF$
553 * 07 A BCDEF$
554 * 01 A BCDEFABCDEF$
555 * 08 B * CDEF$
556 * 02 B * CDEFABCDEF$
557 * 09 C DEF$
558 * 03 C DEFABCDEF$
559 * 10 D EF$
560 * 04 D * EFABCDEF$
561 * 11 E * F$
562 * 05 E * FABCDEF$
563 *
564 * In this sampling a suffix x=SA[i] is marked if x \‍( 0 \equiv x \mod sa_sample_dens \‍) or
565 * BWT[i] is contained in sa_sample_chars.
566 */
567
568template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
569class _bwt_sampling : public int_vector<t_width>
570{
571private:
572 t_bv m_marked;
573 t_rank m_rank_marked;
574
575public:
577 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
579 enum
580 {
581 sample_dens = t_csa::sa_sample_dens
582 };
583 enum
584 {
585 text_order = false
586 };
588
592
594 /*
595 * \param cconfig Cache configuration (BWT,SA, and SAMPLE_CHARS are expected to be cached.).
596 * \param csa Pointer to the corresponding CSA. Not used in this class.
597 * \par Time complexity
598 * Linear in the size of the suffix array.
599 */
600 _bwt_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr)
601 {
605 size_type n = sa_buf.size();
606 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
607 this->width(bits::hi(n) + 1);
608 int_vector<> sample_char;
609 typedef typename t_csa::char_type char_type;
610 std::set<char_type> char_map;
611 if (load_from_cache(sample_char, conf::KEY_SAMPLE_CHAR, cconfig))
612 {
613 for (uint64_t i = 0; i < sample_char.size(); ++i)
614 {
615 char_map.insert((char_type)sample_char[i]);
616 }
617 }
618 size_type sa_cnt = 0;
619 for (size_type i = 0; i < n; ++i)
620 {
621 size_type sa = sa_buf[i];
622 char_type bwt = bwt_buf[i];
623 if (0 == (sa % sample_dens))
624 {
625 marked[i] = 1;
626 ++sa_cnt;
627 }
628 else if (char_map.find(bwt) != char_map.end())
629 {
630 marked[i] = 1;
631 ++sa_cnt;
632 }
633 }
634 this->resize(sa_cnt);
635 sa_cnt = 0;
636 for (size_type i = 0; i < n; ++i)
637 {
638 size_type sa = sa_buf[i];
639 if (marked[i])
640 {
641 base_type::operator[](sa_cnt++) = sa;
642 }
643 }
644 m_marked = std::move(marked);
645 util::init_support(m_rank_marked, &m_marked);
646 }
647
650 {
651 m_marked = st.m_marked;
652 m_rank_marked = st.m_rank_marked;
653 m_rank_marked.set_vector(&m_marked);
654 }
655
657 inline bool is_sampled(size_type i) const
658 {
659 return m_marked[i];
660 }
661
664 {
665 return base_type::operator[](m_rank_marked(i)) * sample_dens;
666 }
667
670 {
671 if (this != &st)
672 {
674 m_marked = st.m_marked;
675 m_rank_marked = st.m_rank_marked;
676 m_rank_marked.set_vector(&m_marked);
677 }
678 return *this;
679 }
680
683 {
684 base_type::swap(st);
685 m_marked.swap(st.m_marked);
686 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
687 }
688
689 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
690 {
691 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
692 size_type written_bytes = 0;
693 written_bytes += base_type::serialize(out, child, "samples");
694 written_bytes += m_marked.serialize(out, child, "marked");
695 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
696 structure_tree::add_size(child, written_bytes);
697 return written_bytes;
698 }
699
700 void load(std::istream & in)
701 {
702 base_type::load(in);
703 m_marked.load(in);
704 m_rank_marked.load(in);
705 m_rank_marked.set_vector(&m_marked);
706 }
707
708 template <typename archive_t>
709 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
710 {
712 ar(CEREAL_NVP(m_marked));
713 ar(CEREAL_NVP(m_rank_marked));
714 }
715
716 template <typename archive_t>
717 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
718 {
720 ar(CEREAL_NVP(m_marked));
721 ar(CEREAL_NVP(m_rank_marked));
722 m_rank_marked.set_vector(&m_marked);
723 }
724};
725
726template <class t_bit_vec = bit_vector, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
733
734template <class t_csa, uint8_t t_width = 0>
735class _isa_sampling : public int_vector<t_width>
736{
737public:
739 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
741 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
742 enum
743 {
744 sample_dens = t_csa::isa_sample_dens
745 };
747
751
753 /*
754 * \param cconfig Cache configuration (SA is expected to be cached.).
755 * \param sa_sample Pointer to the corresponding SA sampling. Not used in this class.
756 * \par Time complexity
757 * Linear in the size of the suffix array.
758 */
759 _isa_sampling(cache_config const & cconfig, SDSL_UNUSED sa_type const * sa_sample = nullptr)
760 {
762 size_type n = sa_buf.size();
763 if (n >= 1)
764 { // so n+t_csa::isa_sample_dens >= 2
765 this->width(bits::hi(n) + 1);
766 this->resize((n - 1) / sample_dens + 1);
767 }
768 for (size_type i = 0; i < this->size(); ++i)
769 base_type::operator[](i) = 0;
770
771 for (size_type i = 0; i < n; ++i)
772 {
773 size_type sa = sa_buf[i];
774 if ((sa % sample_dens) == 0)
775 {
777 }
778 }
779 }
780
783 {
785 }
786
788 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
789 {
790 size_type ci = i / sample_dens;
791 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
792 }
793
795 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
796 {
797 size_type ci = (i / sample_dens + 1) % this->size();
798 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
799 }
800
802 void load(std::istream & in, SDSL_UNUSED sa_type const * sa_sample = nullptr)
803 {
804 base_type::load(in);
805 }
806
807 template <typename archive_t>
808 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
809 {
811 }
812
813 template <typename archive_t>
814 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
815 {
817 }
818
820 {}
821};
822
823template <uint8_t t_width = 0>
825{
826 template <class t_csa>
829};
830
831template <class t_csa, class t_inv_perm, class t_sel>
833{
834 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
835 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
836
837public:
840 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
841 typedef typename sa_type::bv_type bv_type; // bitvector type used to mark SA samples
842 enum
843 {
844 sample_dens = t_csa::isa_sample_dens
845 };
847
848private:
849 t_sel m_select_marked;
850 t_inv_perm m_inv_perm;
851
852public:
853 t_sel const & select_marked = m_select_marked;
854
858
860 /*
861 * \param cconfig Cache configuration. (Not used in this class)
862 * \param sa_sample Pointer to the corresponding SA sampling..
863 * \par Time complexity
864 * Linear in the size of the suffix array.
865 */
867 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
868 {
869 // and initialize the select support on bitvector marked
870 m_select_marked = t_sel(&(sa_sample->marked));
871 int_vector<> const * perm = (int_vector<> const *)sa_sample;
872 m_inv_perm = t_inv_perm(perm);
873 m_inv_perm.set_vector(perm);
874 }
875
878 {
879 m_inv_perm = st.m_inv_perm;
880 m_select_marked = st.m_select_marked;
881 }
882
885 {
886 return m_select_marked(m_inv_perm[i / sample_dens] + 1);
887 }
888
890 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
891 {
892 size_type ci = i / sample_dens;
893 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
894 }
895
897 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
898 {
899 size_type ci = (i / sample_dens + 1) % m_inv_perm.size();
900 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
901 }
902
905 {
906 if (this != &st)
907 {
908 m_inv_perm = st.m_inv_perm;
909 m_select_marked = st.m_select_marked;
910 }
911 return *this;
912 }
913
916 {
917 if (this != &st)
918 {
919 m_inv_perm.swap(st.m_inv_perm);
920 m_select_marked.swap(st.m_select_marked);
921 }
922 }
923
924 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
925 {
926 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
927 size_type written_bytes = 0;
928 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
929 written_bytes += m_select_marked.serialize(out, child, "select_marked");
930 structure_tree::add_size(child, written_bytes);
931 return written_bytes;
932 }
933
935 void load(std::istream & in, sa_type const * sa_sample = nullptr)
936 {
937 m_inv_perm.load(in);
938 m_select_marked.load(in);
939 set_vector(sa_sample);
940 }
941
942 template <typename archive_t>
943 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
944 {
945 ar(CEREAL_NVP(m_inv_perm));
946 ar(CEREAL_NVP(m_select_marked));
947 }
948
949 template <typename archive_t>
950 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr)
951 {
952 ar(CEREAL_NVP(m_inv_perm));
953 ar(CEREAL_NVP(m_select_marked));
954 set_vector(sa_sample);
955 }
956
958 bool operator==(_text_order_isa_sampling_support const & other) const noexcept
959 {
960 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
961 }
962
964 bool operator!=(_text_order_isa_sampling_support const & other) const noexcept
965 {
966 return !(*this == other);
967 }
968
969 void set_vector(sa_type const * sa_sample = nullptr)
970 {
971 if (sa_sample == nullptr)
972 {
973 m_select_marked.set_vector(nullptr);
974 m_inv_perm.set_vector(nullptr);
975 }
976 else
977 {
978 m_select_marked.set_vector(&(sa_sample->marked));
979 m_inv_perm.set_vector((int_vector<> const *)sa_sample);
980 }
981 }
982};
983
984template <class t_inv_perm = inv_perm_support<8>, class t_sel = void>
986{
987 template <class t_csa>
989 t_csa,
990 t_inv_perm,
991 typename std::conditional<std::is_void<t_sel>::value,
992 typename t_csa::sa_sample_type::bv_type::select_1_type,
993 t_sel>::type>;
995};
996
997template <class t_csa, class t_select_sa>
999{
1000 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
1001 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
1002
1003public:
1006 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
1007 enum
1008 {
1009 sample_dens = t_csa::isa_sample_dens
1012
1013private:
1014 sa_type const * m_sa_p = nullptr; // pointer to sa_sample_strategy
1015 t_select_sa m_select_marked_sa;
1016
1017public:
1021
1023 /*
1024 * \param cconfig Cache configuration. (Not used in this class)
1025 * \param sa_sample Pointer to the corresponding SA sampling..
1026 * \par Time complexity
1027 * Linear in the size of the suffix array.
1028 */
1029 _fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const & cconfig, sa_type const * sa_sample) : m_sa_p(sa_sample)
1030 {
1031 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
1032 }
1033
1035 _fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const & st) : m_select_marked_sa(st.m_select_marked_sa)
1036 {
1037 set_vector(st.m_sa_p);
1038 }
1039
1042 {
1043 return m_sa_p->inv(i);
1044 }
1045
1047 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
1048 {
1049 size_type ci = i / sample_dens;
1050 size_type j = m_sa_p->select_marked_isa(ci + 1);
1051 if (j > i)
1052 {
1053 if (ci > 0)
1054 {
1055 ci = ci - 1;
1056 }
1057 else
1058 {
1059 ci = m_sa_p->size() - 1;
1060 }
1061 j = m_sa_p->select_marked_isa(ci + 1);
1062 }
1063 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1064 }
1065
1067 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
1068 {
1069 size_type ci = i / sample_dens;
1070 size_type j = m_sa_p->select_marked_isa(ci + 1);
1071 if (j < i)
1072 {
1073 if (ci < m_sa_p->size() - 1)
1074 {
1075 ci = ci + 1;
1076 }
1077 else
1078 {
1079 ci = 0;
1080 }
1081 j = m_sa_p->select_marked_isa(ci + 1);
1082 }
1083 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
1084 }
1085
1088 {
1089 if (this != &st)
1090 {
1091 m_select_marked_sa = st.m_select_marked_sa;
1092 set_vector(st.m_sa_p);
1093 }
1094 return *this;
1095 }
1096
1099 {
1100 m_select_marked_sa.swap(st.m_select_marked_sa);
1101 }
1102
1103 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
1104 {
1105 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
1106 size_type written_bytes = 0;
1107 written_bytes += m_select_marked_sa.serialize(out, child, "select_marked_sa");
1108 structure_tree::add_size(child, written_bytes);
1109 return written_bytes;
1110 }
1111
1113 void load(std::istream & in, sa_type const * sa_sample = nullptr)
1114 {
1115 m_select_marked_sa.load(in);
1116 set_vector(sa_sample);
1117 }
1118
1119 template <typename archive_t>
1120 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
1121 {
1122 ar(CEREAL_NVP(m_select_marked_sa));
1123 }
1124
1125 template <typename archive_t>
1126 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr)
1127 {
1128 ar(CEREAL_NVP(m_select_marked_sa));
1129 set_vector(sa_sample);
1130 }
1131
1133 bool operator==(_fuzzy_isa_sampling_support const & other) const noexcept
1134 {
1135 return (m_select_marked_sa == other.m_select_marked_sa);
1136 }
1137
1139 bool operator!=(_fuzzy_isa_sampling_support const & other) const noexcept
1140 {
1141 return !(*this == other);
1142 }
1143
1144 void set_vector(sa_type const * sa_sample = nullptr)
1145 {
1146 m_sa_p = sa_sample;
1147 if (nullptr != m_sa_p)
1148 {
1149 m_select_marked_sa.set_vector(&(sa_sample->marked_sa));
1150 }
1151 }
1152};
1153
1154template <class t_select_sa = void>
1156{
1157 template <class t_csa>
1158 using type =
1160 typename std::conditional<std::is_void<t_select_sa>::value,
1161 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
1162 t_select_sa>::type>;
1164};
1165
1166} // namespace sdsl
1167
1168#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
_bwt_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_bwt_sampling()
Default constructor.
base_type::value_type value_type
_bwt_sampling & operator=(_bwt_sampling const &st)
Assignment operation.
void load(std::istream &in)
_bwt_sampling(_bwt_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
int_vector< t_width > base_type
base_type::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support & operator=(_fuzzy_isa_sampling_support const &st)
Assignment operation.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
void set_vector(sa_type const *sa_sample=nullptr)
_fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const &st)
Copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
_fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, sa_type const *sa_sample)
Constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bit_vector::value_type value_type
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling const &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_fuzzy_sa_sampling(_fuzzy_sa_sampling const &st)
Copy constructor.
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
t_select_isa const & select_marked_isa
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
void load(std::istream &in, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Load sampling from disk.
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
isa_sampling_tag sampling_category
_isa_sampling(cache_config const &cconfig, SDSL_UNUSED sa_type const *sa_sample=nullptr)
Constructor.
base_type::value_type value_type
int_vector< t_width > base_type
void set_vector(SDSL_UNUSED sa_type const *)
base_type::size_type size_type
t_csa::sa_sample_type sa_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_sa_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_sa_order_sampling()
Default constructor.
base_type::value_type value_type
void set_vector(sa_type const *sa_sample=nullptr)
_text_order_isa_sampling_support(SDSL_UNUSED cache_config const &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
_text_order_isa_sampling_support & operator=(_text_order_isa_sampling_support const &st)
Assignment operation.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
void load(std::istream &in, sa_type const *sa_sample=nullptr)
Load sampling from disk.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support(_text_order_isa_sampling_support const &st)
Copy constructor.
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
void swap(_text_order_isa_sampling_support &st)
Swap operation.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, sa_type const *sa_sample=nullptr)
value_type condensed_sa(size_type i) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_sampling(cache_config const &cconfig, SDSL_UNUSED t_csa const *csa=nullptr)
Constructor.
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
_text_order_sampling(_text_order_sampling const &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_text_order_sampling & operator=(_text_order_sampling const &st)
Assignment operation.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition io.hpp:36
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
reference operator[](size_type const &i) noexcept
void swap(int_vector &v) noexcept
void load(std::istream &in)
int_vector & operator=(int_vector const &v)
size_type size() const noexcept
The number of elements in the int_vector.
void resize(const size_type size)
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
iterator insert(const_iterator it, value_type value)
Insert an element before the element that the iterator is pointing to.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
uint8_t width() const noexcept
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition wt_int.hpp:62
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition wt_int.hpp:792
size_type size() const
Returns the size of the original vector.
Definition wt_int.hpp:323
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition wt_int.hpp:456
void load(std::istream &in)
Loads the data structure from the given istream.
Definition wt_int.hpp:808
#define SDSL_UNUSED
Definition config.hpp:12
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_SAMPLE_CHAR[]
Definition config.hpp:44
constexpr char KEY_SA[]
Definition config.hpp:36
constexpr char KEY_ISA[]
Definition config.hpp:39
uint64_t id()
uint64_t pid()
std::string to_string(T const &t, int w=1)
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
Definition util.hpp:502
Namespace for the succinct data structure library.
bool cache_file_exists(std::string const &key, cache_config const &config)
Checks if the resource specified by the key exists in the cache.
Definition io.hpp:733
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition io.hpp:717
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
Definition io.hpp:772
constexpr char const * key_bwt()
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
Definition construct.hpp:56
void construct_isa(cache_config &config)
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
Definition io.hpp:874
int_vector ::size_type size(range_type const &r)
Size of a range.
ram_fs.hpp
rank_support_v.hpp contains rank_support_v.
rrr_vector.hpp contains the sdsl::rrr_vector class, and classes which support rank and select for rrr...
sd_vector.hpp contains the sdsl::sd_vector class, and classes which support rank and select for sd_ve...
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Helper class for construction process.
Definition config.hpp:66
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_int.hpp contains a specialized class for a wavelet tree of a sequence of the numbers.