SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
hyb_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4/*
5 * Copyright (c) 2014 Juha Karkkainen, Dominik Kempa and Simon J. Puglisi
6 *
7 * Permission is hereby granted, free of charge, to any person
8 * obtaining a copy of this software and associated documentation
9 * files (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use,
11 * copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following
14 * conditions:
15 *
16 * The above copyright notice and this permission notice shall be
17 * included in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26 * OTHER DEALINGS IN THE SOFTWARE.
27 *
28 * Simon Gog made the following changes:
29 * - replace std::vectors by int_vectors
30 * - add support for rank0
31 * - added naive implementation of method get_int
32 * - TODO: added a naive implementation of select
33 */
34#ifndef INCLUDED_SDSL_HYB_VECTOR
35#define INCLUDED_SDSL_HYB_VECTOR
36
37#include <algorithm>
38#include <assert.h>
39#include <cstdlib>
40#include <iostream>
41#include <stdint.h>
42#include <stdio.h>
43#include <string>
44
45#include <sdsl/bits.hpp>
46#include <sdsl/cereal.hpp>
47#include <sdsl/int_vector.hpp>
48#include <sdsl/io.hpp>
49#include <sdsl/iterators.hpp>
51#include <sdsl/util.hpp>
52
53namespace sdsl
54{
55
56// Needed for friend declarations.
57template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
58class rank_support_hyb;
59template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
60class select_support_hyb;
61
63
71template <uint32_t k_sblock_rate = 16>
73{
74public:
83
84 friend class rank_support_hyb<1, k_sblock_rate>;
85 friend class rank_support_hyb<0, k_sblock_rate>;
86 friend class select_support_hyb<1, k_sblock_rate>;
87 friend class select_support_hyb<0, k_sblock_rate>;
88
89private:
90 static const uint32_t k_block_size;
91 static const uint32_t k_block_bytes;
92 static const uint32_t k_sblock_header_size;
93 static const uint32_t k_sblock_size;
94 static const uint32_t k_hblock_rate;
95
96 size_type m_size = 0; // original bitvector size
97 int_vector<8> m_trunk; // body of encoded blocks
98 int_vector<8> m_sblock_header; // sblock headers
99 int_vector<64> m_hblock_header; // hblock headers
100
101public:
103 hyb_vector() = default;
104 hyb_vector(hyb_vector const & hybrid) = default;
105 hyb_vector(hyb_vector && hybrid) = default;
106 hyb_vector & operator=(hyb_vector const & hybrid) = default;
107 hyb_vector & operator=(hyb_vector && hybrid) = default;
108
111 {
112 m_size = bv.size();
113
114 // Compute the number of blocks.
115 size_type n_blocks = (m_size + k_block_size - 1) / k_block_size;
116 size_type n_sblocks = (n_blocks + k_sblock_rate - 1) / k_sblock_rate;
117 size_type n_hblocks = (n_blocks + k_hblock_rate - 1) / k_hblock_rate;
118
119 size_type trunk_size = 0;
120
121 // runs_lookup[i] = number of runs - 1 in the binary encoding of i.
122 int_vector<8> runs_lookup(65536, 0);
123 runs_lookup[0] = 0;
124 for (uint32_t i = 1; i < 65536; ++i)
125 {
126 runs_lookup[i] = runs_lookup[i >> 1];
127 if (i >= 32768)
128 --runs_lookup[i];
129 if ((i & 1) != ((i >> 1) & 1))
130 ++runs_lookup[i];
131 }
132
133 // Compute optimal encoding for each block.
134 uint64_t const * bv_ptr = bv.data();
135 for (size_type block_id = 0; block_id < n_blocks; ++block_id)
136 {
137 size_type block_beg = block_id * k_block_size;
138 size_type block_end = block_beg + k_block_size;
139
140 uint32_t ones = 0;
141 uint32_t runs = 0;
142
143 if (block_end <= m_size)
144 {
145 // Count the number of ones, fast.
146 uint64_t const * ptr64 = bv_ptr;
147 for (uint8_t i = 0; i < 4; ++i)
148 ones += bits::cnt(*ptr64++);
149
150 // Count the number of runs, fast.
151 ptr64 = bv_ptr;
152 for (uint8_t i = 0; i < 4; ++i)
153 {
154 // Count changes of bits inside 16-bit words of *ptr64.
155 for (uint8_t j = 0; j < 4; ++j)
156 runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
157
158 // Count changes of bits between 16-bit words of *ptr64.
159 for (uint8_t j = 0; j < 3; ++j)
160 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
161 ++ptr64;
162 }
163
164 // Count changes of bits between 64-bit words.
165 ptr64 = bv_ptr;
166 for (uint8_t i = 0; i < 3; ++i)
167 {
168 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
169 ++ptr64;
170 }
171 ++runs;
172 }
173 else
174 {
175 // Count number of ones and runs, slow.
176 uint8_t prevbit = 2;
177 for (size_type i = block_beg; i < block_end; ++i)
178 {
179 uint8_t bit = (i < m_size ? bv[i] : 0);
180 if (bit == 1)
181 ++ones;
182 if (bit != prevbit)
183 ++runs;
184 prevbit = bit;
185 }
186 }
187
188 // Choose best encoding.
189 uint32_t minority_enc_size = std::min(ones, k_block_size - ones);
190 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
191 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
192 best_enc_size = std::min(best_enc_size, k_block_bytes);
193
194 // Update the answer.
195 trunk_size += best_enc_size;
196 bv_ptr += k_block_size / 64;
197 }
198
199 // Allocate the memory.
200 m_sblock_header = int_vector<8>(n_sblocks * k_sblock_header_size, 0);
201 m_hblock_header = int_vector<64>(n_hblocks * 2, 0);
202 m_trunk = int_vector<8>(trunk_size, 0);
203
204 // The actual encoding follows.
205 size_type tot_rank = 0; // stores current rank value
206 size_type sblock_ones = 0; // number of 1s inside superblock
207 size_type trunk_ptr = 0;
208
209 // Process blocks left to right.
210 bv_ptr = bv.data();
211 for (size_type block_id = 0; block_id < n_blocks; ++block_id)
212 {
213 size_type block_beg = block_id * k_block_size;
214 size_type block_end = block_beg + k_block_size;
215 size_type sblock_id = block_id / k_sblock_rate;
216 size_type hblock_id = block_id / k_hblock_rate;
217
218 // Update hblock header.
219 if (!(block_id % k_hblock_rate))
220 {
221 m_hblock_header[2 * hblock_id] = trunk_ptr;
222 m_hblock_header[2 * hblock_id + 1] = tot_rank;
223 }
224
225 // Update sblock header.
226 if (!(block_id % k_sblock_rate))
227 {
228 uint32_t * ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * sblock_id);
229 *ptr++ = trunk_ptr - m_hblock_header[2 * hblock_id];
230 *ptr = tot_rank - m_hblock_header[2 * hblock_id + 1];
231
232 // If the sblock is uniform, flip the bit.
233 if (sblock_id && (!sblock_ones || sblock_ones == k_sblock_size))
234 {
235 ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * (sblock_id - 1));
236 *ptr |= 0x80000000;
237 }
238
239 // Reset the number of ones in sblock.
240 sblock_ones = 0;
241 }
242
243 uint32_t ones = 0;
244 uint32_t runs = 0;
245
246 // Compute the number of 1-bits and runs inside current block.
247 if (block_end <= m_size)
248 {
249 // Count the number of ones, fast.
250 uint64_t const * ptr64 = bv_ptr;
251 for (uint8_t i = 0; i < 4; ++i)
252 ones += bits::cnt(*ptr64++);
253
254 // Count the number of runs, fast.
255 ptr64 = bv_ptr;
256 for (uint8_t i = 0; i < 4; ++i)
257 {
258 for (uint8_t j = 0; j < 4; ++j)
259 runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
260 for (uint8_t j = 0; j < 3; ++j)
261 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
262 ++ptr64;
263 }
264 ptr64 = bv_ptr;
265 for (uint8_t i = 0; i < 3; ++i)
266 {
267 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
268 ++ptr64;
269 }
270 ++runs;
271 }
272 else
273 {
274 // Count number of ones and runs, slow.
275 uint8_t prevbit = 2;
276 for (size_type i = block_beg; i < block_end; ++i)
277 {
278 uint8_t bit = (i < m_size ? bv[i] : 0);
279 if (bit == 1)
280 ++ones;
281 if (bit != prevbit)
282 ++runs;
283 prevbit = bit;
284 }
285 }
286 uint32_t zeros = k_block_size - ones;
287
288 // Store block popcount.
289 uint16_t * header_ptr16 =
290 (uint16_t *)(((uint8_t *)m_sblock_header.data()) + sblock_id * k_sblock_header_size + 8
291 + (block_id % k_sblock_rate) * 2);
292
293 (*header_ptr16) = ones;
294 if (ones == k_block_size)
295 (*header_ptr16) |= 0x200;
296
297 if (0 < ones && ones < k_block_size)
298 { // non uniform block
299 uint32_t minority_enc_size = std::min(ones, zeros);
300 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
301 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
302
303 if (k_block_bytes <= best_enc_size)
304 {
305 // Use plain encoding.
306 (*header_ptr16) |= (k_block_bytes << 10);
307
308 // Copy original 256 bits from bv into trunk.
309 if (block_end <= m_size)
310 {
311 for (uint8_t i = 0; i < 4; ++i)
312 {
313 *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = *(bv_ptr + i);
314 trunk_ptr += 8;
315 }
316 }
317 else
318 {
319 for (size_type i = block_beg; i < block_end; i += 64)
320 {
321 uint64_t w = 0;
322 for (size_type j = i; j < std::min(i + 64, block_end); ++j)
323 {
324 uint8_t bit = (j < m_size ? bv[j] : 0);
325 if (bit)
326 w |= ((uint64_t)1 << (j - i));
327 }
328 *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = w;
329 trunk_ptr += 8;
330 }
331 }
332 }
333 else
334 {
335 if (runs_enc_size < minority_enc_size)
336 {
337
338 // Use runs encoding.
339 (*header_ptr16) |= (runs_enc_size << 10);
340 (*header_ptr16) |= (bv[block_beg] << 9);
341
342 if (block_end <= m_size)
343 {
344 // Find run ends, fast.
345 uint32_t runid = 0;
346 uint64_t const * ptr64 = bv_ptr;
347
348 uint64_t w = 0;
349 for (uint8_t i = 0; runid < runs_enc_size && i < 4; ++i)
350 {
351 // Check if run end aligns with the end of the 64-bit word.
352 if (i > 0 && (w & 1) != ((*ptr64) & 1))
353 m_trunk[trunk_ptr + runid++] = 64 * i - 1;
354
355 w = (*ptr64++);
356 for (uint8_t j = 0; runid < runs_enc_size && j < 63; ++j)
357 {
358 if ((w & 1) != ((w >> 1) & 1))
359 m_trunk[trunk_ptr + runid++] = j + i * 64;
360 w >>= 1;
361 }
362 }
363 trunk_ptr += runid;
364 }
365 else
366 {
367 // Find run ends, slow.
368 uint8_t prevbit = 2;
369 uint32_t runid = 0;
370
371 for (size_type i = block_beg; runid < runs_enc_size; ++i)
372 {
373 uint8_t bit = (i < m_size ? bv[i] : 0);
374 if (bit != prevbit && i != block_beg)
375 m_trunk[trunk_ptr + runid++] = (i - block_beg - 1);
376 prevbit = bit;
377 }
378 trunk_ptr += runid;
379 }
380 }
381 else
382 {
383 // Use minority encoding.
384 // Update sblock header.
385 (*header_ptr16) |= (minority_enc_size << 10);
386 if (ones < zeros)
387 (*header_ptr16) |= 0x200;
388 uint8_t keybit = (ones < zeros);
389
390 // Find positions of 1-bits, fast.
391 if (block_end <= m_size)
392 {
393 uint64_t const * ptr64 = bv_ptr;
394 for (uint8_t i = 0; i < 4; ++i)
395 {
396 uint64_t w = (*ptr64++);
397 for (uint8_t j = 0; j < 64; ++j)
398 {
399 if ((w & 1) == keybit)
400 m_trunk[trunk_ptr++] = j + 64 * i;
401 w >>= 1;
402 }
403 }
404 }
405 else
406 {
407 for (size_type i = block_beg; i < block_end; ++i)
408 {
409 uint8_t bit = (i < m_size ? bv[i] : 0);
410
411 if (bit == keybit)
412 m_trunk[trunk_ptr++] = i - block_beg;
413 }
414 }
415 }
416 }
417 }
418
419 // Update global rank.
420 tot_rank += ones;
421 sblock_ones += ones;
422 bv_ptr += k_block_size / 64;
423 }
424 }
425
426private:
428 value_type access0(size_type i) const
429 {
430 assert(i > 0);
431 assert(i <= m_size);
432
433 size_type block_id = (i - 1) / k_block_size;
434 size_type sblock_id = block_id / k_sblock_rate;
435 size_type hblock_id = block_id / k_hblock_rate;
436
437 size_type trunk_base = m_hblock_header[2 * hblock_id];
438
439 uint32_t local_i = i - block_id * k_block_size;
440
441 // Read superblock header.
442 uint8_t const * header_ptr8 = ((uint8_t const *)m_sblock_header.data()) + (sblock_id * k_sblock_header_size);
443 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
444 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
445 header_ptr8 += 8;
446
447 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
448
449 // Uniform superblock optimization.
450 if ((*header_ptr32) & 0x80000000)
451 return (value_type)((*(header_ptr8 + 1)) & 0x01);
452
453 // Fast forward through preceding blocks in the superblock.
454 for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
455 {
456 trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
457 ++header_ptr16;
458 }
459
460 uint8_t const * trunk_p = ((uint8_t const *)m_trunk.data()) + trunk_ptr;
461
462 uint32_t encoding_size = ((*header_ptr16) >> 10);
463 uint32_t ones = ((*header_ptr16) & 0x1ff);
464 uint32_t zeros = k_block_size - ones;
465
466 // Number of runs <= 2.
467 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
468 if (!encoding_size)
469 {
470 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
471 uint8_t inside_second_run = (first_run_length < local_i);
472 return (inside_second_run ^ special_bit);
473 }
474
475 // Number of runs > 2.
476 if (encoding_size < k_block_bytes)
477 {
478 if (std::min(ones, zeros) == encoding_size)
479 {
480 // Minority encoding.
481 uint32_t tot = 0;
482 while (tot < encoding_size && *trunk_p < local_i)
483 {
484 ++trunk_p;
485 ++tot;
486 }
487 uint8_t last_was_majority = ((!tot) || (*(trunk_p - 1) != local_i - 1));
488 return (last_was_majority ^ special_bit);
489 }
490
491 // Runs encoding.
492 if (special_bit)
493 {
494 uint32_t j = 0;
495 uint32_t acc = 0;
496
497 int32_t last = -1;
498 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
499 {
500 acc += *trunk_p - last;
501 ++trunk_p;
502 last = *trunk_p;
503 ++trunk_p;
504 j += 2;
505 }
506
507 uint8_t access_i = 0;
508 if (j + 1 >= encoding_size)
509 {
510 if (j < encoding_size)
511 { // j == encoding_size - 1
512 if (local_i <= (uint32_t)(*trunk_p) + 1)
513 access_i = (((int32_t)local_i - last - 1) > 0);
514 else
515 {
516 acc += (int32_t)(*trunk_p) - last;
517 if (ones - acc <= k_block_size - local_i)
518 access_i = 0;
519 else
520 access_i = 1;
521 }
522 }
523 else
524 { // j == encoding_size
525 if ((int32_t)(ones - acc) < (int32_t)local_i - last - 1)
526 access_i = 0;
527 else
528 access_i = (((int32_t)local_i - last - 1) > 0);
529 }
530 }
531 else
532 {
533 if ((*trunk_p) < local_i - 1)
534 access_i = 0;
535 else
536 access_i = (((int32_t)local_i - last - 1) > 0);
537 }
538
539 return access_i;
540 }
541 else
542 {
543 uint32_t j = 0;
544 uint32_t acc = 0;
545 int32_t last = -1;
546
547 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
548 {
549 acc += *trunk_p - last;
550 ++trunk_p;
551 last = *trunk_p;
552 ++trunk_p;
553 j += 2;
554 }
555
556 uint8_t access_i = 0;
557 if (j + 1 >= encoding_size)
558 {
559 if (j < encoding_size)
560 {
561 if (local_i <= (uint32_t)(*trunk_p) + 1)
562 access_i = (((int32_t)local_i - last - 1) == 0);
563 else
564 {
565 acc += (*trunk_p) - last;
566 if (zeros - acc <= k_block_size - local_i)
567 access_i = 1;
568 else
569 access_i = 0;
570 }
571 }
572 else
573 {
574 if ((int32_t)(zeros - acc) < (int32_t)local_i - last - 1)
575 access_i = 1;
576 else
577 access_i = ((local_i - last - 1) == 0);
578 }
579 }
580 else
581 {
582 if ((*trunk_p) < local_i - 1)
583 access_i = 1;
584 else
585 access_i = (((int32_t)local_i - last - 1) == 0);
586 }
587
588 return access_i;
589 }
590 }
591 else
592 {
593 // plain encoding.
594 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr);
595 uint32_t bit;
596 for (bit = 0; bit + 64 <= local_i; bit += 64)
597 trunk_ptr64++;
598
599 uint8_t access_i = 0;
600 if (bit != local_i)
601 access_i = (((*trunk_ptr64) >> (local_i - bit - 1)) & 1);
602 else
603 access_i = (((*(trunk_ptr64 - 1)) >> 63) & 1);
604 return access_i;
605 }
606 }
607
608public:
610
617 uint64_t get_int(size_type idx, const uint8_t len = 64) const
618 {
619 uint64_t res = 0;
620 for (size_t i = 0; i < len; ++i)
621 {
622 res <<= 1;
623 res |= (*this)[idx + len - 1 - i];
624 }
625 return res;
626 }
627
630 {
631 return access0(i + 1);
632 }
633
636 {
637 return m_size;
638 }
639
641 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
642 {
643 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
644 size_type written_bytes = 0;
645 written_bytes += write_member(m_size, out, child, "size");
646 written_bytes += m_trunk.serialize(out, child, "trunk");
647 written_bytes += m_sblock_header.serialize(out, child, "sblock_header");
648 written_bytes += m_hblock_header.serialize(out, child, "hblock_header");
649 structure_tree::add_size(child, written_bytes);
650 return written_bytes;
651 }
652
654 void load(std::istream & in)
655 {
656 read_member(m_size, in);
657 m_trunk.load(in);
658 m_sblock_header.load(in);
659 m_hblock_header.load(in);
660 }
661
662 template <typename archive_t>
663 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
664 {
665 ar(CEREAL_NVP(m_size));
666 ar(CEREAL_NVP(m_trunk));
667 ar(CEREAL_NVP(m_sblock_header));
668 ar(CEREAL_NVP(m_hblock_header));
669 }
670
671 template <typename archive_t>
672 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
673 {
674 ar(CEREAL_NVP(m_size));
675 ar(CEREAL_NVP(m_trunk));
676 ar(CEREAL_NVP(m_sblock_header));
677 ar(CEREAL_NVP(m_hblock_header));
678 }
679
681 {
682 return iterator(this, 0);
683 }
684
685 iterator end() const
686 {
687 return iterator(this, size());
688 }
689
690 bool operator==(hyb_vector const & v) const
691 {
692 return m_size == v.m_size && m_trunk == v.m_trunk && m_sblock_header == v.m_sblock_header
693 && m_hblock_header == v.m_hblock_header;
694 }
695
696 bool operator!=(hyb_vector const & v) const
697 {
698 return !(*this == v);
699 }
700};
701
702template <uint32_t k_sblock_rate>
703const uint32_t hyb_vector<k_sblock_rate>::k_block_size = 256;
704template <uint32_t k_sblock_rate>
705const uint32_t hyb_vector<k_sblock_rate>::k_block_bytes = 32;
706template <uint32_t k_sblock_rate>
707const uint32_t hyb_vector<k_sblock_rate>::k_sblock_header_size = 8 + 2 * k_sblock_rate;
708template <uint32_t k_sblock_rate>
709const uint32_t hyb_vector<k_sblock_rate>::k_sblock_size = 256 * k_sblock_rate;
710template <uint32_t k_sblock_rate>
711const uint32_t hyb_vector<k_sblock_rate>::k_hblock_rate = (1U << 31) / 256;
712
713template <uint8_t t_bp>
715{
718 {
719 return res;
720 }
721};
722
723template <>
724struct rank_result<0>
725{
728 {
729 return i - res;
730 }
731};
732
734
738// TODO:
739template <uint8_t t_b, uint32_t k_sblock_rate>
741{
742public:
745 enum
746 {
747 bit_pat = t_b
748 };
749 enum
750 {
751 bit_pat_len = (uint8_t)1
752 };
753
754private:
755 bit_vector_type const * m_v;
756
757public:
759 explicit rank_support_hyb(bit_vector_type const * v = nullptr)
760 {
761 set_vector(v);
762 }
763
765 const size_type rank(size_type i) const
766 {
767 assert(m_v != nullptr);
768 assert(i <= m_v->size());
769
770 // Handle easy case.
771 if (i <= 0)
772 return 0;
773
774 size_type block_id = (i - 1) / bit_vector_type::k_block_size;
775 size_type sblock_id = block_id / k_sblock_rate;
776 size_type hblock_id = block_id / bit_vector_type::k_hblock_rate;
777
778 size_type trunk_base = m_v->m_hblock_header[2 * hblock_id];
779 size_type hblock_rank = m_v->m_hblock_header[2 * hblock_id + 1];
780
781 uint32_t local_i = i - block_id * bit_vector_type::k_block_size;
782
783 // Read superblock header.
784 uint8_t const * header_ptr8 =
785 ((uint8_t const *)(m_v->m_sblock_header.data())) + (sblock_id * bit_vector_type::k_sblock_header_size);
786 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
787 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
788 size_type sblock_rank = *(header_ptr32 + 1);
789 header_ptr8 += 8;
790
791 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
792
793 // Uniform superblock optimization.
794 if ((*header_ptr32) & 0x80000000)
795 {
796 return rank_result<t_b>::adapt(hblock_rank + sblock_rank
797 + ((*(header_ptr8 + 1)) & 0x01)
798 * (i - sblock_id * bit_vector_type::k_sblock_size),
799 i);
800 }
801
802 // Fast forward through preceding blocks in the superblock.
803 size_type block_rank = 0;
804 for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
805 {
806 trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
807 block_rank += ((*header_ptr16) & 0x1ff); // Add 1s in the block.
808 ++header_ptr16;
809 }
810
811 uint8_t const * trunk_p = ((uint8_t *)m_v->m_trunk.data()) + trunk_ptr;
812
813 uint32_t encoding_size = ((*header_ptr16) >> 10);
814 uint32_t ones = ((*header_ptr16) & 0x1ff);
815 uint32_t zeros = bit_vector_type::k_block_size - ones;
816
817 // Number of runs <= 2.
818 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
819 if (!encoding_size)
820 {
821 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
822 uint32_t local_rank = std::min(local_i, first_run_length);
823
825 hblock_rank + sblock_rank + block_rank
826 + (special_bit * local_rank + (1 - special_bit) * (local_i - local_rank)),
827 i);
828 }
829
830 // Number of runs > 2.
831 if (encoding_size < bit_vector_type::k_block_bytes)
832 {
833 if (std::min(ones, zeros) == encoding_size)
834 {
835 // Minority encoding.
836 uint32_t tot = 0;
837 while (tot < encoding_size && (*trunk_p++) < local_i)
838 ++tot;
839
840 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + special_bit * tot
841 + (1 - special_bit) * (local_i - tot),
842 i);
843 }
844
845 // Runs encoding.
846 if (special_bit)
847 {
848 uint32_t j = 0;
849 uint32_t acc = 0;
850
851 int32_t last = -1;
852 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
853 {
854 acc += *trunk_p - last;
855 ++trunk_p;
856 last = *trunk_p;
857 ++trunk_p;
858 j += 2;
859 }
860
861 if (j + 1 >= encoding_size)
862 {
863 if (j < encoding_size)
864 {
865 if (*trunk_p >= local_i)
866 acc += local_i - last - 1;
867 else
868 {
869 acc += (*trunk_p) - last;
870 acc += (ones - acc) - std::min(ones - acc, bit_vector_type::k_block_size - local_i);
871 }
872 }
873 else
874 acc += std::min(ones - acc, local_i - last - 1);
875 }
876 else
877 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
878
879 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + acc, i);
880 }
881 else
882 {
883 uint32_t j = 0;
884 uint32_t acc = 0;
885 int32_t last = -1;
886
887 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
888 {
889 acc += *trunk_p - last;
890 ++trunk_p;
891 last = *trunk_p;
892 ++trunk_p;
893 j += 2;
894 }
895
896 if (j + 1 >= encoding_size)
897 {
898 if (j < encoding_size)
899 {
900 if (*trunk_p >= local_i)
901 acc += local_i - last - 1;
902 else
903 {
904 acc += (*trunk_p) - last;
905 acc += (zeros - acc) - std::min(zeros - acc, bit_vector_type::k_block_size - local_i);
906 }
907 }
908 else
909 acc += std::min(zeros - acc, local_i - last - 1);
910 }
911 else
912 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
913
914 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + (local_i - acc), i);
915 }
916 }
917 else
918 {
919 // plain encoding.
920 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_v->m_trunk.data()) + trunk_ptr);
921 uint32_t bit;
922 for (bit = 0; bit + 64 <= local_i; bit += 64)
923 block_rank += bits::cnt(*trunk_ptr64++);
924 if (bit != local_i)
925 block_rank += bits::cnt((*trunk_ptr64) & (((uint64_t)1 << (local_i - bit)) - 1));
926
927 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank, i);
928 }
929 }
930
933 {
934 return rank(i);
935 }
936
938 const size_type size() const
939 {
940 return m_v->size();
941 }
942
944 void set_vector(bit_vector_type const * v = nullptr)
945 {
946 m_v = v;
947 }
948
951 {
952 if (this != &rs)
953 {
954 set_vector(rs.m_v);
955 }
956 return *this;
957 }
958
960 void load(std::istream &, bit_vector_type const * v = nullptr)
961 {
962 set_vector(v);
963 }
964
966 size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
967 {
968 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
969 structure_tree::add_size(child, 0);
970 return 0;
971 }
972
973 template <typename archive_t>
974 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
975 {}
976
977 template <typename archive_t>
979 {}
980
981 bool operator==(rank_support_hyb const & other) const noexcept
982 {
983 return *m_v == *other.m_v;
984 }
985
986 bool operator!=(rank_support_hyb const & other) const noexcept
987 {
988 return !(*this == other);
989 }
990};
991
993
998template <uint8_t t_b, uint32_t k_sblock_rate>
1000{
1001public:
1004 enum
1005 {
1006 bit_pat = t_b
1008 enum
1009 {
1010 bit_pat_len = (uint8_t)1
1012
1013private:
1014 bit_vector_type const * m_v;
1015
1016public:
1018 explicit select_support_hyb(bit_vector_type const * v = nullptr)
1019 {
1020 set_vector(v);
1021 }
1022
1025 {
1026 fprintf(stderr, "\nhyb_vector: select queries are not currently supported\n");
1027 std::exit(EXIT_FAILURE);
1028 }
1029
1032 {
1033 return select(i);
1034 }
1035
1037 const size_type size() const
1038 {
1039 return m_v->size();
1040 }
1041
1043 void set_vector(bit_vector_type const * v = nullptr)
1044 {
1045 m_v = v;
1046 }
1047
1050 {
1051 if (this != &rs)
1052 {
1053 set_vector(rs.m_v);
1054 }
1055 return *this;
1056 }
1057
1059 void load(std::istream &, bit_vector_type const * v = nullptr)
1060 {
1061 set_vector(v);
1062 }
1063
1065 size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
1066 {
1067 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
1068 structure_tree::add_size(child, 0);
1069 return 0;
1070 }
1071
1072 template <typename archive_t>
1073 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
1074 {}
1075
1076 template <typename archive_t>
1078 {}
1079
1080 bool operator==(select_support_hyb const & other) const noexcept
1081 {
1082 return *m_v == *other.m_v;
1083 }
1084
1085 bool operator!=(select_support_hyb const & other) const noexcept
1086 {
1087 return !(*this == other);
1088 }
1089};
1090
1091} // end namespace sdsl
1092
1093#endif // INCLUDED_SDSL_HYB_VECTOR
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
A hybrid-encoded compressed bitvector representation.
iterator end() const
bool operator==(hyb_vector const &v) const
hyb_vector()=default
Default constructor.
select_support_hyb< 0, k_sblock_rate > select_0_type
hyb_vector & operator=(hyb_vector &&hybrid)=default
uint64_t get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
hyb_vector & operator=(hyb_vector const &hybrid)=default
void load(std::istream &in)
Loads the data structure from the given istream.
hyb_vector(hyb_vector &&hybrid)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
iterator begin() const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
rank_support_hyb< 0, k_sblock_rate > rank_0_type
select_support_hyb< 1, k_sblock_rate > select_1_type
hyb_vector(hyb_vector const &hybrid)=default
hyb_vector(bit_vector const &bv)
Constructor.
random_access_const_iterator< hyb_vector > iterator
size_type size() const
Returns the size of the original bitvector.
bit_vector::difference_type difference_type
bit_vector::size_type size_type
bool operator!=(hyb_vector const &v) const
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
value_type operator[](size_type i) const
Accessing the i-th element of the original bitvector.
rank_support_hyb< 1, k_sblock_rate > rank_1_type
bit_vector::value_type value_type
A generic vector class for integers of width .
Definition io.hpp:36
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Generic iterator for a random access container.
Definition iterators.hpp:24
Rank_support for the hyb_vector class.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
rank_support_hyb(bit_vector_type const *v=nullptr)
Standard constructor.
const size_type size() const
Return the size of the original vector.
void set_vector(bit_vector_type const *v=nullptr)
Set the supported vector.
bool operator!=(rank_support_hyb const &other) const noexcept
void load(std::istream &, bit_vector_type const *v=nullptr)
Load the data structure from a stream and set the supported vector.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bool operator==(rank_support_hyb const &other) const noexcept
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
bit_vector_type::size_type size_type
const size_type rank(size_type i) const
Answers rank queries.
rank_support_hyb & operator=(rank_support_hyb const &rs)
Assignment operator.
const size_type operator()(size_type i) const
Shorthand for rank(i)
hyb_vector< k_sblock_rate > bit_vector_type
Select support for the hyb_vector class.
select_support_hyb(bit_vector_type const *v=nullptr)
Standard constructor.
bit_vector_type::size_type size_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
select_support_hyb & operator=(select_support_hyb const &rs)
Assignment operator.
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
void load(std::istream &, bit_vector_type const *v=nullptr)
Load the data structure from a stream and set the supported vector.
bool operator==(select_support_hyb const &other) const noexcept
size_type select(size_type) const
Answers select queries.
const size_type operator()(size_type i) const
Shorthand for select(i)
const size_type size() const
Return the size of the original vector.
void set_vector(bit_vector_type const *v=nullptr)
Set the supported vector.
hyb_vector< k_sblock_rate > bit_vector_type
bool operator!=(select_support_hyb const &other) const noexcept
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition bits.hpp:486
bit_vector::size_type size_type
static size_type adapt(size_type res, size_type i)
static size_type adapt(size_type res, size_type)
bit_vector::size_type size_type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.