SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
int_vector_buffer.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_INT_VECTOR_BUFFER
9#define INCLUDED_INT_VECTOR_BUFFER
10
11#include <cassert>
12#include <fstream>
13#include <iterator>
14#include <stdint.h>
15#include <string>
16
17#include <sdsl/int_vector.hpp>
18#include <sdsl/ram_fs.hpp>
19#include <sdsl/sfstream.hpp>
20#include <sdsl/util.hpp>
21
22namespace sdsl
23{
24
25template <uint8_t t_width = 0>
27{
28public:
29 class iterator;
30
33
34private:
35 static_assert(t_width <= 64, "int_vector_buffer: width must be at most 64 bits.");
36 sdsl::isfstream m_ifile;
37 sdsl::osfstream m_ofile;
38 std::string m_filename;
39 int_vector<t_width> m_buffer;
40 bool m_need_to_write = false;
41 // length of int_vector header in bytes: 0 for plain, 8 for int_vector<t_width> (0 < t_width), 9 for int_vector<0>
42 uint64_t m_offset = 0;
43 uint64_t m_buffersize = 8; // in elements! m_buffersize*width() must be a multiple of 8!
44 uint64_t m_size = 0; // size of int_vector_buffer
45 uint64_t m_begin = 0; // number in elements
46
48 void read_block(const uint64_t idx)
49 {
50 m_begin = (idx / m_buffersize) * m_buffersize;
51 if (m_begin >= m_size)
52 {
53 util::set_to_value(m_buffer, 0);
54 }
55 else
56 {
57 m_ifile.seekg(m_offset + (m_begin * width()) / 8);
58 assert(m_ifile.good());
59 m_ifile.read((char *)m_buffer.data(), (m_buffersize * width()) / 8);
60 if ((uint64_t)m_ifile.gcount() < (m_buffersize * width()) / 8)
61 {
62 m_ifile.clear();
63 }
64 assert(m_ifile.good());
65 for (uint64_t i = m_size - m_begin; i < m_buffersize; ++i)
66 {
67 m_buffer[i] = 0;
68 }
69 }
70 }
71
73 void write_block()
74 {
75 if (m_need_to_write)
76 {
77 m_ofile.seekp(m_offset + (m_begin * width()) / 8);
78 assert(m_ofile.good());
79 if (m_begin + m_buffersize >= m_size)
80 {
81 // last block in file
82 uint64_t wb = ((m_size - m_begin) * width() + 7) / 8;
83 m_ofile.write((char *)m_buffer.data(), wb);
84 }
85 else
86 {
87 m_ofile.write((char *)m_buffer.data(), (m_buffersize * width()) / 8);
88 }
89 m_ofile.flush();
90 assert(m_ofile.good());
91 m_need_to_write = false;
92 }
93 }
94
96 uint64_t read(const uint64_t idx)
97 {
98 assert(is_open());
99 assert(idx < m_size);
100 if (idx < m_begin or m_begin + m_buffersize <= idx)
101 {
102 write_block();
103 read_block(idx);
104 }
105 return m_buffer[idx - m_begin];
106 }
107
109 void write(const uint64_t idx, const uint64_t value)
110 {
111 assert(is_open());
112 // If idx is not in current block, write current block and load needed block
113 if (idx < m_begin or m_begin + m_buffersize <= idx)
114 {
115 write_block();
116 read_block(idx);
117 }
118 if (m_size <= idx)
119 {
120 m_size = idx + 1;
121 }
122 m_need_to_write = true;
123 m_buffer[idx - m_begin] = value;
124 }
125
126public:
129 {
130 m_buffer = int_vector<t_width>();
131 }
132
134
143 int_vector_buffer(const std::string filename,
144 std::ios::openmode mode = std::ios::in,
145 const uint64_t buffer_size = 1024 * 1024,
146 const uint8_t int_width = t_width,
147 bool const is_plain = false)
148 {
149 m_filename = filename;
150 assert(!(mode & std::ios::app));
151 mode &= ~std::ios::app;
152 m_buffer.width(int_width);
153 if (is_plain)
154 {
155 m_offset = 0;
156 // is_plain is only allowed with width() in {8, 16, 32, 64}
157 assert(8 == width() or 16 == width() or 32 == width() or 64 == width());
158 }
159 else
160 {
161 m_offset = 8; // TODO: make this dependent on header size of int_vector<t_width>
162 }
163
164 // Open file for IO
165 m_ofile.open(m_filename, mode | std::ios::out | std::ios::binary);
166 assert(m_ofile.good());
167 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
168 assert(m_ifile.good());
169 if (mode & std::ios::in)
170 {
171 uint64_t size = 0;
172 if (is_plain)
173 {
174 m_ifile.seekg(0, std::ios_base::end);
175 size = m_ifile.tellg() * 8;
176 }
177 else
178 {
179 uint8_t width = 0;
181 m_buffer.width(width);
182 }
183 assert(m_ifile.good());
184 m_size = size / width();
185 }
186 buffersize(buffer_size);
187 }
188
191 m_filename(std::move(ivb.m_filename)),
192 m_buffer(std::move(ivb.m_buffer)),
193 m_need_to_write(ivb.m_need_to_write),
194 m_offset(ivb.m_offset),
195 m_buffersize(ivb.m_buffersize),
196 m_size(ivb.m_size),
197 m_begin(ivb.m_begin)
198 {
199 ivb.m_ifile.close();
200 ivb.m_ofile.close();
201 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
202 m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
203 assert(m_ifile.good());
204 assert(m_ofile.good());
205 // set ivb to default-constructor state
206 ivb.m_filename = "";
207 ivb.m_buffer = int_vector<t_width>();
208 ivb.m_need_to_write = false;
209 ivb.m_offset = 0;
210 ivb.m_buffersize = 8;
211 ivb.m_size = 0;
212 ivb.m_begin = 0;
213 }
214
217 {
218 close();
219 }
220
223 {
224 close();
225 ivb.m_ifile.close();
226 ivb.m_ofile.close();
227 m_filename = ivb.m_filename;
228 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
229 m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
230 assert(m_ifile.good());
231 assert(m_ofile.good());
232 // assign the values of ivb to this
233 m_buffer = (int_vector<t_width> &&) ivb.m_buffer;
234 m_need_to_write = ivb.m_need_to_write;
235 m_offset = ivb.m_offset;
236 m_buffersize = ivb.m_buffersize;
237 m_size = ivb.m_size;
238 m_begin = ivb.m_begin;
239 // set ivb to default-constructor state
240 ivb.m_filename = "";
241 ivb.m_buffer = int_vector<t_width>();
242 ivb.m_need_to_write = false;
243 ivb.m_offset = 0;
244 ivb.m_buffersize = 8;
245 ivb.m_size = 0;
246 ivb.m_begin = 0;
247 return *this;
248 }
249
251 uint8_t width() const
252 {
253 return m_buffer.width();
254 }
255
257 uint64_t size() const
258 {
259 return m_size;
260 }
261
263 std::string filename() const
264 {
265 return m_filename;
266 }
267
269 uint64_t buffersize() const
270 {
271 assert(m_buffersize * width() % 8 == 0);
272 return (m_buffersize * width()) / 8;
273 }
274
276 void buffersize(uint64_t buffersize)
277 {
278 if (0ULL == buffersize)
279 buffersize = 8;
280 write_block();
281 if (0 == (buffersize * 8) % width())
282 {
283 m_buffersize =
284 buffersize * 8 / width(); // m_buffersize might not be multiple of 8, but m_buffersize*width() is.
285 }
286 else
287 {
288 uint64_t element_buffersize =
289 (buffersize * 8) / width() + 1; // one more element than fits into given buffersize in byte
290 m_buffersize = element_buffersize + 7 - (element_buffersize + 7) % 8; // take next multiple of 8
291 }
292 m_buffer = int_vector<t_width>(m_buffersize, 0, width());
293 if (0 != m_buffersize)
294 read_block(0);
295 }
296
298 bool good()
299 {
300 return m_ifile.good() and m_ofile.good();
301 }
302
304 bool is_open()
305 {
306 return m_ifile.is_open() and m_ofile.is_open();
307 ;
308 }
309
311 void reset()
312 {
313 // reset file
314 assert(m_ifile.good());
315 assert(m_ofile.good());
316 m_ifile.close();
317 m_ofile.close();
318 m_ofile.open(m_filename, std::ios::out | std::ios::binary);
319 assert(m_ofile.good());
320 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
321 assert(m_ifile.good());
322 assert(m_ofile.good());
323 // reset member variables
324 m_need_to_write = false;
325 m_size = 0;
326 // reset buffer
327 read_block(0);
328 }
329
330 // Forward declaration
331 class reference;
332
334
337 reference operator[](uint64_t idx)
338 {
339 return reference(this, idx);
340 }
341
343 void push_back(const uint64_t value)
344 {
345 write(m_size, value);
346 }
347
349
352 void close(bool remove_file = false)
353 {
354 if (is_open())
355 {
356 if (!remove_file)
357 {
358 write_block();
359 if (0 < m_offset)
360 { // in case of int_vector, write header and trailing zeros
361 uint64_t size = m_size * width();
362 m_ofile.seekp(0, std::ios::beg);
364 assert(m_ofile.good());
365 uint64_t wb = (size + 7) / 8;
366 if (wb % 8)
367 {
368 m_ofile.seekp(m_offset + wb);
369 assert(m_ofile.good());
370 m_ofile.write("\0\0\0\0\0\0\0\0", 8 - wb % 8);
371 assert(m_ofile.good());
372 }
373 }
374 }
375 m_ifile.close();
376 assert(m_ifile.good());
377 m_ofile.close();
378 assert(m_ofile.good());
379 if (remove_file)
380 {
381 sdsl::remove(m_filename);
382 }
383 }
384 }
385
387 {
388 return iterator(*this, 0);
389 }
390
392 {
393 return iterator(*this, size());
394 }
395
397 {
398 friend class int_vector_buffer<t_width>;
399
400 private:
401 int_vector_buffer<t_width> * const m_int_vector_buffer = nullptr;
402 uint64_t m_idx = 0;
403
404 reference()
405 {}
406
407 reference(int_vector_buffer<t_width> * _int_vector_buffer, uint64_t _idx) :
408 m_int_vector_buffer(_int_vector_buffer),
409 m_idx(_idx)
410 {}
411
412 public:
414 operator uint64_t() const
415 {
416 return m_int_vector_buffer->read(m_idx);
417 }
418
420 reference & operator=(uint64_t const & val)
421 {
422 m_int_vector_buffer->write(m_idx, val);
423 return *this;
424 }
425
428 {
429 return *this = (uint64_t)(x);
430 };
431
432 reference(reference const &) = default;
433
436 {
437 uint64_t x = m_int_vector_buffer->read(m_idx);
438 m_int_vector_buffer->write(m_idx, x + 1);
439 return *this;
440 }
441
443 uint64_t operator++(int)
444 {
445 uint64_t val = (uint64_t) * this;
446 ++(*this);
447 return val;
448 }
449
452 {
453 uint64_t x = m_int_vector_buffer->read(m_idx);
454 m_int_vector_buffer->write(m_idx, x - 1);
455 return *this;
456 }
457
459 uint64_t operator--(int)
460 {
461 uint64_t val = (uint64_t) * this;
462 --(*this);
463 return val;
464 }
465
467 reference & operator+=(const uint64_t x)
468 {
469 uint64_t w = m_int_vector_buffer->read(m_idx);
470 m_int_vector_buffer->write(m_idx, w + x);
471 return *this;
472 }
473
475 reference & operator-=(const uint64_t x)
476 {
477 uint64_t w = m_int_vector_buffer->read(m_idx);
478 m_int_vector_buffer->write(m_idx, w - x);
479 return *this;
480 }
481
482 bool operator==(reference const & x) const
483 {
484 return (uint64_t) * this == (uint64_t)x;
485 }
486
487 bool operator<(reference const & x) const
488 {
489 return (uint64_t) * this < (uint64_t)x;
490 }
491 };
492
494 {
495 private:
497 uint64_t m_idx = 0;
498
499 public:
500 using iterator_category = std::random_access_iterator_tag;
505
506 iterator() = delete;
507 iterator(int_vector_buffer<t_width> & ivb, uint64_t idx = 0) : m_ivb(ivb), m_idx(idx)
508 {}
509
511 {
512 ++m_idx;
513 return *this;
514 }
515
517 {
518 iterator it = *this;
519 ++(*this);
520 return it;
521 }
522
524 {
525 --m_idx;
526 return *this;
527 }
528
530 {
531 iterator it = *this;
532 --(*this);
533 return it;
534 }
535
537 {
538 return m_ivb[m_idx];
539 }
540
542 {
543 if (i < 0)
544 return *this -= (-i);
545 m_idx += i;
546 return *this;
547 }
548
550 {
551 if (i < 0)
552 return *this += (-i);
553 m_idx -= i;
554 return *this;
555 }
556
558 {
559 iterator it = *this;
560 return it += i;
561 }
562
564 {
565 iterator it = *this;
566 return it -= i;
567 }
568
569 bool operator==(iterator const & it) const
570 {
571 return &m_ivb == &(it.m_ivb) and m_idx == it.m_idx;
572 }
573
574 bool operator!=(iterator const & it) const
575 {
576 return !(*this == it);
577 }
579 {
580 return (m_idx - it.m_idx);
581 }
582 };
583};
584
585} // namespace sdsl
586
587#endif // include guard
iterator operator-(difference_type i) const
sdsl::int_vector_buffer< t_width >::difference_type difference_type
sdsl::int_vector_buffer< t_width >::value_type value_type
difference_type operator-(iterator const &it)
iterator(int_vector_buffer< t_width > &ivb, uint64_t idx=0)
bool operator!=(iterator const &it) const
std::random_access_iterator_tag iterator_category
bool operator==(iterator const &it) const
iterator & operator-=(difference_type i)
iterator operator+(difference_type i) const
iterator & operator+=(difference_type i)
uint64_t operator++(int)
Postfix increment of the proxy object.
bool operator<(reference const &x) const
reference & operator-=(const uint64_t x)
Subtract assign from the proxy object.
reference & operator--()
Prefix decrement of the proxy object.
reference(reference const &)=default
reference & operator+=(const uint64_t x)
Add assign from the proxy object.
reference & operator=(reference const &x)
Assignment operator.
uint64_t operator--(int)
Postfix decrement of the proxy object.
bool operator==(reference const &x) const
reference & operator++()
Prefix increment of the proxy object.
reference & operator=(uint64_t const &val)
Assignment operator for write operations.
uint64_t size() const
Returns the number of elements currently stored.
void reset()
Delete all content and set size to 0.
int_vector_buffer(const std::string filename, std::ios::openmode mode=std::ios::in, const uint64_t buffer_size=1024 *1024, const uint8_t int_width=t_width, bool const is_plain=false)
Constructor for int_vector_buffer.
void close(bool remove_file=false)
Close the int_vector_buffer.
reference operator[](uint64_t idx)
[] operator
int_vector_buffer< t_width > & operator=(int_vector_buffer &&ivb)
Move assignment operator.
int_vector< t_width >::difference_type difference_type
int_vector< t_width >::value_type value_type
bool good()
Returns whether state of underlying streams are good.
void buffersize(uint64_t buffersize)
Set the buffersize in bytes.
std::string filename() const
Returns the filename.
uint64_t buffersize() const
Returns the buffersize in bytes.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
uint8_t width() const
Returns the width of the integers which are accessed via the [] operator.
int_vector_buffer(int_vector_buffer &&ivb)
Move constructor.
bool is_open()
Returns whether underlying streams are currently associated to a file.
A generic vector class for integers of width .
Definition io.hpp:36
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
ptrdiff_t difference_type
int_vector_trait< t_width >::value_type value_type
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
isfstream & seekg(pos_type pos)
Definition sfstream.hpp:298
bool is_open()
Is the stream close?
Definition sfstream.hpp:249
std::streampos tellg()
Definition sfstream.hpp:362
buf_ptr_type open(std::string const &file, std::ios_base::openmode mode=std::ios_base::in)
Open the stream.
Definition sfstream.hpp:218
void close()
Close the stream.
Definition sfstream.hpp:264
osfstream & seekp(pos_type pos)
Definition sfstream.hpp:125
void close()
Close the stream.
Definition sfstream.hpp:91
bool is_open()
Is the stream close?
Definition sfstream.hpp:76
buf_ptr_type open(std::string const &file, std::ios_base::openmode mode=std::ios_base::out)
Open the stream.
Definition sfstream.hpp:45
int_vector.hpp contains the sdsl::int_vector class.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition util.hpp:597
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
ram_fs.hpp
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.