SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
coder_elias_gamma.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_ELIAS_GAMMA
9#define SDSL_CODER_ELIAS_GAMMA
10
11#include <assert.h>
12#include <stdint.h>
13
14#include <sdsl/bits.hpp>
15
16namespace sdsl
17{
18
19namespace coder
20{
21
23template <typename T = void>
24class elias_gamma
25{
26public:
27 typedef uint64_t size_type;
28
29 static struct impl
30 {
32
36 uint32_t prefixsum[1 << 16];
37
38 uint16_t prefixsum_8bit[(1 << 8) * 8];
39
40 impl()
41 {
42 // initialize prefixsum
43 for (uint64_t x = 0; x < (1 << 16); ++x)
44 {
45 uint64_t const * w = &x; // copy of x
46 uint64_t value = 0;
47 uint16_t numbers = 0, offset = 0, offset2 = 0;
48 while ((x >> offset) != 0)
49 {
50 uint64_t len_1 = bits::read_unary_bounded(w, offset);
51 if (len_1 == 0)
52 {
53 offset += 1;
54 value += 1;
55 ++numbers;
56 }
57 else
58 {
59 offset2 = offset + len_1 + 1;
60 if (offset2 + len_1 <= 16)
61 {
62 value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
63 offset = offset2 + len_1;
64 ++numbers;
65 }
66 else
67 break;
68 }
69 }
70 uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
71 // number of decoded values,
72 // and the last 16 bit equals value of decoded prefix sum
73 result = (offset << 24) | (numbers << 16) | value;
74 if (value > 0)
75 assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
76 prefixsum[x] = result;
77 }
78 // initialize prefixsum_8bit
79
80 for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
81 {
82 for (uint64_t x = 0; x < (1 << 8); ++x)
83 {
84 uint64_t const * w = &x; // copy of x
85 uint64_t value = 0;
86 uint32_t numbers = 0, offset = 0, offset2 = 0;
87 while ((x >> offset) != 0 and numbers < maxi)
88 {
89 uint64_t len_1 = bits::read_unary_bounded(w, offset);
90 if (len_1 == 0)
91 {
92 offset += 1;
93 value += 1;
94 ++numbers;
95 }
96 else
97 {
98 offset2 = offset + len_1 + 1;
99 if (offset2 + len_1 <= 8)
100 {
101 value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
102 offset = offset2 + len_1;
103 ++numbers;
104 }
105 else
106 break;
107 }
108 }
109 uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
110 // number of decoded values,
111 // and the last 16 bit equals value of decoded prefix sum
112 result = (offset << 12) | (numbers << 8) | value;
113 prefixsum_8bit[idx++] = result;
114 }
115 }
116 }
117 } data;
118
119 static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
120 static uint8_t encoding_length(uint64_t);
122 /* \param data Bitstring
123 * \param start_idx Starting index of the decoding.
124 * \param n Number of values to decode from the bitstring.
125 * \param it Iterator to decode the values.
126 */
127 template <bool t_sumup, bool t_inc, class t_iter>
128 static uint64_t decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
129
132
137 static uint64_t decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n);
138 static uint64_t
139 decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n);
140
141 template <class int_vector>
142 static bool encode(int_vector const & v, int_vector & z);
143 template <class int_vector>
144 static bool decode(int_vector const & z, int_vector & v);
145
147 /* \param x Positive integer to encode.
148 * \param z Raw data of vector to write the encoded form of x.
149 * \param start_idx Beginning bit index to write the encoded form ox x in z.
150 */
151 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
152
153 template <class int_vector>
154 static uint64_t * raw_data(int_vector & v)
155 {
156 return v.m_data;
157 }
158};
159
160// \sa coder::elias_gamma::encoding_length
161template <typename T>
162inline uint8_t elias_gamma<T>::encoding_length(uint64_t w)
163{
164 uint8_t len_1 = w ? bits::hi(w) : 64;
165 return 2 * len_1 + 1;
166}
167
168template <typename T>
169template <class int_vector>
171{
172 typedef typename int_vector::size_type size_type;
173 z.width(v.width());
174 size_type z_bit_size = 0;
175 uint64_t w;
176 const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
177 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
178 {
179 if ((w = *it) == 0)
180 {
181 w = zero_val;
182 }
183 z_bit_size += encoding_length(w);
184 }
185 z.bit_resize(z_bit_size); // Initial size of z
186 z.shrink_to_fit();
187 if (z_bit_size & 0x3F)
188 { // if z_bit_size % 64 != 0
189 *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
190 }
191 z_bit_size = 0;
192 uint64_t * z_data = z.m_data;
193 uint8_t offset = 0;
194 size_type len_1; // TODO: change to uint8_t and test it
195 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
196 {
197 w = *it;
198 if (w == 0)
199 {
200 w = zero_val;
201 }
202 // (number of bits to represent w)-1
203 if (!w)
204 {
205 len_1 = 64;
206 bits::write_int_and_move(z_data, 0ULL, offset, 64);
207 bits::write_int_and_move(z_data, 1ULL, offset, 1);
208 }
209 else
210 {
211 len_1 = bits::hi(w);
212 bits::write_int_and_move(z_data, 1ULL << len_1, offset, len_1 + 1);
213 }
214 if (len_1)
215 {
216 bits::write_int_and_move(z_data, w, offset, len_1);
217 }
218 }
219 return true;
220}
221
222template <typename T>
223inline void elias_gamma<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
224{
225 uint8_t len_1 = 0;
226 if (!x)
227 {
228 len_1 = 64;
229 bits::write_int_and_move(z, 0, offset, 64);
230 bits::write_int_and_move(z, 1, offset, 1);
231 }
232 else
233 {
234 len_1 = bits::hi(x);
235 bits::write_int_and_move(z, 1ULL << len_1, offset, len_1 + 1);
236 }
237 if (len_1)
238 {
239 bits::write_int_and_move(z, x, offset, len_1);
240 }
241}
242
243template <typename T>
244template <class int_vector>
246{
247 typename int_vector::size_type len_1, n = 0;
248 uint64_t const * z_data = z.data();
249 uint64_t const * z_end = z.data() + (z.bit_size() >> 6);
250 uint8_t offset = 0;
251 while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
252 {
253 len_1 = bits::read_unary_and_move(z_data, offset);
254 if (len_1)
255 {
256 bits::move_right(z_data, offset, len_1);
257 }
258 ++n;
259 }
260 v.width(z.width());
261 v.resize(n);
262 v.shrink_to_fit();
263 return decode<false, true>(z.data(), 0, n, v.begin());
264}
265
266template <typename T>
267template <bool t_sumup, bool t_inc, class t_iter>
268inline uint64_t elias_gamma<T>::decode(uint64_t const * d, const size_type start_idx, size_type n, t_iter it)
269{
270 d += (start_idx >> 6);
271 uint64_t value = 0;
272 size_type i = 0;
273 size_type len_1;
274 uint8_t offset = start_idx & 0x3F;
275 while (i++ < n)
276 { // while not all values are decoded
277 if (!t_sumup)
278 value = 0;
279 len_1 = bits::read_unary_and_move(d, offset); // read length of x-1
280 if (!len_1)
281 {
282 value += 1;
283 }
284 else
285 {
286 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
287 }
288 if (t_inc)
289 *(it++) = value;
290 }
291 return value;
292}
293
294template <typename T>
295inline uint64_t
296elias_gamma<T>::decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n)
297{
298 if (n == 0)
299 return 0;
300 uint64_t const * lastdata = d + ((end_idx + 63) >> 6);
301 d += (start_idx >> 6);
302 uint64_t w = 0, value = 0;
303 int16_t buffered = 0, read = start_idx & 0x3F;
304 size_type i = 0;
305 if (n + read <= 64)
306 {
307 if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n])
308 return n;
309 }
310 else
311 { // n+read > 64
312 if ((*d >> read) == bits::lo_set[64 - read])
313 { // all bits are set to 1
314 value = 64 - read;
315 ++d;
316 n -= (64 - read);
317 read = 0;
318 while (n >= 64)
319 {
320 if (*d == 0xFFFFFFFFFFFFFFFFULL)
321 {
322 value += 64;
323 ++d;
324 n -= 64;
325 }
326 else
327 goto start_decoding;
328 }
329 // 0 <= n <= 64
330 if ((*d & bits::lo_set[n]) == bits::lo_set[n])
331 return value + n;
332 }
333 }
334
335start_decoding:
336 while (i < n)
337 {
338 while (buffered < 64 and d < lastdata)
339 {
340 fill_buffer:
341 w |= (((*d) >> read) << buffered);
342 if (read >= buffered)
343 {
344 ++d;
345 buffered += 64 - read;
346 read = 0;
347 }
348 else
349 { // read buffered
350 read += 64 - buffered;
351 buffered = 64;
352 }
353 }
354 uint32_t rbp = bits::lo(~w);
355 if (rbp > 0)
356 {
357 i += rbp;
358 value += rbp;
359 if (i >= n)
360 { // decoded too much
361 return value - (i - n); // return corrected value
362 }
363 assert((int64_t)buffered >= rbp);
364 buffered -= rbp;
365 w >>= rbp;
366 if (buffered < 16)
367 goto fill_buffer;
368 }
369 {
370 // i < n
371 begin_decode:
372 uint32_t psum = elias_gamma<T>::data.prefixsum[w & 0x0000FFFF];
373 if (!psum or i + ((psum >> 16) & 0x00FF) > n)
374 {
375 if (w == 0)
376 { // buffer is not full
377 w |= (((*d) >> read) << buffered);
378 if (read >= buffered)
379 {
380 ++d;
381 buffered += 64 - read;
382 read = 0;
383 }
384 else
385 {
386 read += 64 - buffered;
387 buffered = 64;
388 };
389 if (!w)
390 {
391 w |= (((*d) >> read) << buffered);
392 if (read >= buffered)
393 {
394 ++d;
395 buffered += 64 - read;
396 read = 0;
397 }
398 else
399 {
400 read += 64 - buffered;
401 buffered = 64;
402 };
403 }
404 }
405 // assert(w>0);
406 uint16_t len_1 = bits::lo(w); // read length of length
407 buffered -= (len_1 + 1);
408 w >>= (len_1 + 1);
409 if (len_1 > buffered)
410 { // buffer is not full
411 w |= (((*d) >> read) << buffered);
412 if (read >= buffered)
413 {
414 ++d;
415 buffered += 64 - read;
416 read = 0;
417 }
418 else
419 {
420 read += 64 - buffered;
421 buffered = 64;
422 };
423 if (len_1 > buffered)
424 {
425 w |= (((*d) >> read) << buffered);
426 if (read >= buffered)
427 {
428 ++d;
429 buffered += 64 - read;
430 read = 0;
431 }
432 else
433 {
434 read += 64 - buffered;
435 buffered = 64;
436 };
437 }
438 }
439 value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << len_1);
440 buffered -= len_1;
441 if (len_1 < 64)
442 {
443 w >>= len_1;
444 }
445 else
446 {
447 w = 0;
448 }
449 ++i;
450 if (i == n)
451 return value;
452 if (buffered >= 16)
453 goto begin_decode;
454 }
455 else
456 {
457 value += (psum & 0x0000FFFF);
458 i += ((psum >> 16) & 0x00FF);
459 if (i == n)
460 return value;
461 buffered -= (psum >> 24);
462 w >>= (psum >> 24);
463 if (buffered >= 16)
464 goto begin_decode;
465 }
466 }
467 };
468 return value;
469}
470
471template <typename T>
472inline uint64_t elias_gamma<T>::decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n)
473{
474 if (n == 0)
475 return 0;
476 d += (start_idx >> 6);
477 uint64_t value = 0;
478 size_type i = 0;
479 uint8_t offset = start_idx & 0x3F;
480
481 if (n < 24)
482 {
483 if (n + offset <= 64)
484 {
485 if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n])
486 return n;
487 }
488 else
489 { // n+offset > 64
490 if ((*d >> offset) == bits::lo_set[64 - offset])
491 { // all bits are set to 1
492 value = 64 - offset;
493 ++d;
494 n -= (64 - offset);
495 offset = 0;
496 while (n >= 64)
497 {
498 if (*d == 0xFFFFFFFFFFFFFFFFULL)
499 {
500 value += 64;
501 ++d;
502 n -= 64;
503 }
504 else
505 {
506 uint8_t temp = bits::lo(~(*d));
507 value += temp;
508 n -= temp;
509 offset = temp;
510 goto start_decoding;
511 }
512 }
513 // 0 <= n <= 64
514 if ((*d & bits::lo_set[n]) == bits::lo_set[n])
515 return value + n;
516 }
517 }
518 }
519
520start_decoding:
521 while (i < n)
522 { // while not all values are decoded
523 // n-i values to decode
524
525 if (((*d >> offset) & 0xF) == 0xF)
526 {
527 uint8_t maxdecode = n - i > 63 ? 63 : n - i;
528 uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
529 i += rbp;
530 value += rbp;
531 if (rbp + offset >= 64)
532 {
533 ++d;
534 offset = (rbp + offset) & 0x3F;
535 }
536 else
537 {
538 offset += rbp;
539 }
540 if (rbp == maxdecode)
541 continue;
542 }
543
544 while (i < n)
545 {
546 uint32_t psum = elias_gamma<T>::data.prefixsum[bits::read_int(d, offset, 16)];
547 if (psum == 0)
548 { // value does not fit in 16 bits
549 goto decode_single;
550 }
551 else if (i + ((psum >> 16) & 0x00FF) > n)
552 { // decoded too much
553 if (n - i <= 8)
554 {
555 psum = elias_gamma<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
556 if (psum > 0)
557 {
558 value += (psum & 0xFF);
559 i += ((psum >> 8) & 0xF);
560 offset += (psum >> 12);
561 if (offset >= 64)
562 {
563 offset &= 0x3F;
564 ++d;
565 }
566 }
567 }
568 break;
569 }
570 else
571 {
572 value += (psum & 0x0000FFFF);
573 i += ((psum >> 16) & 0x00FF);
574 offset += (psum >> 24);
575 if (offset >= 64)
576 {
577 offset &= 0x3F;
578 ++d;
579 }
580 }
581 }
582 if (i < n)
583 {
584 decode_single:
585 i++;
586 uint16_t len_1 = bits::read_unary_and_move(d, offset); // read length of length of x
587 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
588 }
589 }
590 return value;
591}
592
593template <typename T>
594typename elias_gamma<T>::impl elias_gamma<T>::data;
595
596} // end namespace coder
597
598} // end namespace sdsl
599#endif
bits.hpp contains the sdsl::bits class.
A class to encode and decode between Elias- and binary code.
static const uint8_t min_codeword_length
static uint64_t * raw_data(int_vector &v)
static bool encode(int_vector const &v, int_vector &z)
static uint64_t decode(uint64_t const *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
static struct sdsl::coder::elias_gamma::impl data
static uint64_t decode_prefix_sum(uint64_t const *d, const size_type start_idx, size_type n)
Decode n Elias gamma encoded integers beginning at start_idx in the bitstring "data" and return the s...
static uint8_t encoding_length(uint64_t)
A generic vector class for integers of width .
Definition io.hpp:36
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
int_vector_size_type size_type
void shrink_to_fit()
Free unused allocated memory.
void resize(const size_type size)
Resize the int_vector in terms of elements.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type bit_size() const noexcept
The number of bits in the int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Namespace for the succinct data structure library.
static constexpr uint64_t read_int_bounded(uint64_t const *word, uint8_t offset=0, const uint8_t len=64)
Definition bits.hpp:793
static constexpr void move_right(uint64_t const *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition bits.hpp:893
static constexpr uint64_t read_unary_and_move(uint64_t const *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition bits.hpp:857
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition bits.hpp:689
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition bits.hpp:749
static constexpr uint64_t read_int(uint64_t const *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition bits.hpp:777
static constexpr uint64_t read_unary_bounded(uint64_t const *word, uint8_t offset=0)
Definition bits.hpp:843
static constexpr uint64_t read_int_and_move(uint64_t const *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition bits.hpp:799
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:194