SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
coder_comma.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_COMMA_INCLUDED
9#define SDSL_CODER_COMMA_INCLUDED
10
11#include <algorithm>
12#include <array>
13#include <stddef.h>
14#include <stdint.h>
15
16#include <sdsl/bits.hpp>
17#include <sdsl/config.hpp>
18
19namespace sdsl
20{
21
22namespace coder
23{
24
26
47template <uint8_t t_width = 2>
48class comma
49{
50private:
51 static_assert(t_width > 1 && t_width <= 32, "comma coder: Width must be in interval [2,32]");
52
53 static constexpr size_t base_fits_in_64(uint32_t base, uint64_t product, size_t res)
54 {
55 return product == 0 ? res : base_fits_in_64(base, product / base, res + 1);
56 }
57
58 // base in which numbers are coded
59 static const uint32_t base = (1 << t_width) - 1;
60
61 // table needed for computation of encoding lengths.
62 // table contains entries of the kind (index, base^index)
63 // to know how much digits a number needs to be encoded.
64 static constexpr size_t codelentbllen = base_fits_in_64(base, 0xFFFFFFFFFFFFFFFFULL, 0);
65
66 static struct impl
67 {
68 std::array<uint64_t, codelentbllen> codelentbl;
69 impl()
70 {
71 // intialize codelentbl
72 uint64_t n = 1;
73 for (size_t i = 0; i < codelentbllen; i++)
74 {
75 codelentbl[i] = n;
76 n = (n << t_width) - n; // n = n * base
77 }
78 }
79 } data;
80
81 // helper function to encode a single number without
82 // termination digit
83 static void encode_in_base(uint64_t x, uint64_t *& z, uint8_t & offset);
84
85public:
86 typedef uint64_t size_type;
87 static const uint8_t min_codeword_length = t_width; // 0 needs t_width bits as termination
88
90
92 // the value w in comma code.
95 static uint8_t encoding_length(uint64_t w);
96
98 // at bit position start_idx.
99 /* \param x Positive integer to encode.
100 * \param z Raw data of vector to write the encoded form of x.
101 * \param start_idx Beginning bit index to write the encoded form ox x in z.
102 */
103 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
104
106 /* \param v vector containing positive integer values
107 * \param z vector to put the encoded values
108 */
109 template <class int_vector>
110 static bool encode(int_vector const & v, int_vector & z);
111
113
115 // in the bitstring "data"
116 /* \param data Bitstring
117 * \param start_idx Starting index of the decoding.
118 * \param n Number of values to decode from the bitstring.
119 * \param it Iterator to store the values.
120 */
121 template <bool t_sumup, bool t_inc, class t_iter>
122 static uint64_t decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
123
125 // beginning at start_idx in the bitstring "data"
126 // and return the sum of these values.
135 static uint64_t decode_prefix_sum(uint64_t const * data, const size_type start_idx, size_type n);
136
138 // beginning at start_idx ending at end_idx (exclusive)
139 // in the bitstring "data"
140 // and return the sum of these values.
151 static uint64_t
152 decode_prefix_sum(uint64_t const * data, const size_type start_idx, const size_type end_idx, size_type n);
153
155 // and store them in vector v.
159 template <class int_vector>
160 static bool decode(int_vector const & z, int_vector & v);
161
162 // interface needs this function for whatever :>
163 template <class int_vector>
164 static uint64_t * raw_data(int_vector & v)
165 {
166 return v.m_data;
167 }
168};
169
171
173
175
176template <uint8_t t_width>
177typename comma<t_width>::impl comma<t_width>::data;
178
179template <uint8_t t_width>
180inline uint8_t comma<t_width>::encoding_length(uint64_t w)
181{
182 // use function table and binary search to determine the number of digits
183 // needed to encode w in given base.
184 uint8_t numdigits = std::upper_bound(data.codelentbl.begin(), data.codelentbl.end(), w) - data.codelentbl.begin();
185 // finally calculate length.
186 // Don't forget termination character on calculations ;)
187 return (numdigits + 1) * t_width;
188}
189
190template <uint8_t t_width>
191void comma<t_width>::encode_in_base(uint64_t x, uint64_t *& z, uint8_t & offset)
192{
193 if (x)
194 {
195 uint32_t digit = x % base; // get next digit
196 // encode digits with higher order
197 encode_in_base(x / base, z, offset);
198 // and write own digit
199 bits::write_int_and_move(z, digit, offset, t_width);
200 }
201}
202
203template <uint8_t t_width>
204inline void comma<t_width>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
205{
206 // encode x itself
207 encode_in_base(x, z, offset);
208 // and append the termination digit
209 bits::write_int_and_move(z, base, offset, t_width);
210}
211
212template <uint8_t t_width>
213template <class int_vector>
215{
216 // first, find out how much bits vector z needs to save values
217 typedef typename int_vector::size_type size_type;
218 size_type z_bit_size = 0;
219 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
220 {
221 z_bit_size += encoding_length(*it);
222 }
223
224 // trim vector z to correct size
225 z.width(v.width());
226 z.bit_resize(z_bit_size); // for future may check if resizing works
227 z.shrink_to_fit();
228
229 // iterate again and save values in z
230 uint64_t * z_data = z.m_data;
231 uint8_t offset = 0;
232 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
233 {
234 encode(*it, z_data, offset);
235 }
236 return true;
237}
238
240
241template <uint8_t t_width>
242template <bool t_sumup, bool t_inc, class t_iter>
243inline uint64_t comma<t_width>::decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it)
244{
245 data += (start_idx >> 6); // jump to byte offset
246 uint8_t offset = start_idx & 0x3F; // and calculate bit offset
247 uint64_t value = 0;
248 for (size_type i = 0; i < n; i++)
249 {
250 // read next value
251 uint64_t v = 0;
252 for (uint32_t digit = (uint32_t)bits::read_int_and_move(data, offset, t_width); // read first digit
253 digit != base; // while digit is not the terminating digit
254 v = (v << t_width) - v + digit, // v = v * base + digit
255 digit = (uint32_t)bits::read_int_and_move(data, offset, t_width))
256 ; // and read next digit
257 // now decide how to handle value
258 value = (t_sumup) ? value + v : v;
259 if (t_inc)
260 *(it++) = value;
261 }
262 return value;
263}
264
265template <uint8_t t_width>
266uint64_t comma<t_width>::decode_prefix_sum(uint64_t const * data, const size_type start_idx, size_type n)
267{
268 // easiest seems to be to use already build function decode...
269 return decode<true, false, int *>(data, start_idx, n);
270 // Note for above: 3rd template parameter ca be any pntr except void *
271}
272
273template <uint8_t t_width>
274uint64_t comma<t_width>::decode_prefix_sum(uint64_t const * data,
275 const size_type start_idx,
276 SDSL_UNUSED const size_type end_idx,
277 size_type n)
278{
279 // end index does not change anything here...
280 return decode_prefix_sum(data, start_idx, n);
281}
282
283template <uint8_t t_width>
284template <class int_vector>
286{
287 // check if bit size is dividable through t_width.
288 if (z.bit_size() % t_width != 0)
289 return false;
290
291 // calculate num of overall digits in z (including terminating digit)
292 uint64_t numOfDigits = z.bit_size() / t_width;
293 // iteration vars for z vector
294 uint64_t const * z_data = z.data();
295 uint8_t z_offset = 0;
296 // utility to count number of entries in z, and last read digit
297 uint32_t digit = base;
298 typename int_vector::size_type n = 0;
299
300 // iterate over all digits. each time a termination digit is
301 // detected, a encoded number in vector ends.
302 while (numOfDigits--)
303 {
304 digit = (uint32_t)bits::read_int_and_move(z_data, z_offset, t_width);
305 if (digit == base)
306 n++; // termination digit detected
307 }
308
309 // also, ensure last read digit was a termination digit
310 if (digit != base)
311 return false;
312
313 // resize vector v
314 v.width(z.width());
315 v.resize(n);
316 v.shrink_to_fit();
317
318 // and finally decode and save result in v
319 decode<false, true>(z.data(), 0, n, v.begin());
320 return true;
321}
322
323} // end of namespace coder
324} // end of namespace sdsl
325
326#endif
bits.hpp contains the sdsl::bits class.
A class to encode and decode between comma code and binary code.
static uint64_t * raw_data(int_vector &v)
static uint8_t encoding_length(uint64_t w)
Get the number of bits that are necessary to encode.
static uint64_t decode_prefix_sum(uint64_t const *data, const size_type start_idx, size_type n)
Decode n comma gamma encoded integers.
static void encode(uint64_t x, uint64_t *&z, uint8_t &offset)
Encode one positive integer x to an int_vector.
static uint64_t decode_prefix_sum(uint64_t const *data, const size_type start_idx, const size_type end_idx, size_type n)
Decode n comma gamma encoded integers.
static uint64_t decode(uint64_t const *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n comma encoded values beginning at start_idx.
static const uint8_t min_codeword_length
A generic vector class for integers of width .
Definition io.hpp:36
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
int_vector_size_type size_type
void shrink_to_fit()
Free unused allocated memory.
void resize(const size_type size)
Resize the int_vector in terms of elements.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type bit_size() const noexcept
The number of bits in the int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
#define SDSL_UNUSED
Definition config.hpp:12
Namespace for the succinct data structure library.
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition bits.hpp:749
static constexpr uint64_t read_int_and_move(uint64_t const *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition bits.hpp:799