SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
int_vector_mapper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef SDSL_INT_VECTOR_MAPPER
5#define SDSL_INT_VECTOR_MAPPER
6
7#include <cinttypes>
8#include <cstdio>
9#include <iostream>
10#include <stdexcept>
11#include <stdlib.h>
12#include <string.h>
13#include <string>
14#include <unistd.h>
15
16#include <sdsl/bits.hpp>
17#include <sdsl/config.hpp>
18#include <sdsl/int_vector.hpp>
19#include <sdsl/io.hpp>
21#include <sdsl/ram_fs.hpp>
22#include <sdsl/sfstream.hpp>
23#include <sdsl/util.hpp>
24
25namespace sdsl
26{
27
28template <uint8_t t_width = 0, std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
30{
31 static_assert(t_width <= 64, "int_vector_mapper: width must be at most 64 bits.");
32
33public:
38 static constexpr uint8_t fixed_int_width = t_width;
39
40public:
42
43private:
44 uint8_t * m_mapped_data = nullptr;
45 uint64_t m_file_size_bytes = 0;
46 off_t m_data_offset = 0;
47 int m_fd = -1;
48 int_vector<t_width> m_wrapper;
49 std::string m_file_name;
50 bool m_delete_on_close;
51
52public:
56
57public:
59 {
60 if (m_mapped_data)
61 {
62 auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
63 if (ret != 0)
64 {
65 std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
66 << std::endl;
67 }
68
69 if (t_mode & std::ios_base::out)
70 { // write was possible
71 if (m_data_offset)
72 { // if the file is not a plain file
73 // set std::ios::in to not truncate the file
74 osfstream out(m_file_name, std::ios::in);
75 if (out)
76 {
77 out.seekp(0, std::ios::beg);
78 int_vector<t_width>::write_header(m_wrapper.m_size, m_wrapper.m_width, out);
79
80 // out.seekp(0, std::ios::end);
81 }
82 else
83 {
84 std::cerr << "int_vector_mapper: could not open file for header update" << std::endl;
85 /*
86 * throw std::runtime_error("int_vector_mapper: \
87 * could not open file for header update");
88 */
89 }
90 }
91 }
92
93 if (t_mode & std::ios_base::out)
94 {
95 // do we have to truncate?
96 size_type current_bit_size = m_wrapper.m_size;
97 size_type data_size_in_bytes = ((current_bit_size + 63) >> 6) << 3;
98 if (m_file_size_bytes != data_size_in_bytes + m_data_offset)
99 {
100 int tret = memory_manager::truncate_file_mmap(m_fd, data_size_in_bytes + m_data_offset);
101 if (tret == -1)
102 {
103 std::string truncate_error =
104 std::string("int_vector_mapper: truncate error. ") + std::string(util::str_from_errno());
105 std::cerr << truncate_error;
106 }
107 }
108 }
109 }
110 if (m_fd != -1)
111 {
113 if (ret != 0)
114 {
115 std::cerr << "int_vector_mapper: error closing file mapping'" << m_file_name << "': " << ret
116 << std::endl;
117 }
118 if (m_delete_on_close)
119 {
120 int ret_code = sdsl::remove(m_file_name);
121 if (ret_code != 0)
122 {
123 std::cerr << "int_vector_mapper: error deleting file '" << m_file_name << "': " << ret_code
124 << std::endl;
125 }
126 }
127 }
128 m_wrapper.m_data = nullptr;
129 m_wrapper.m_size = 0;
130 }
131
133 {
134 m_wrapper.m_data = ivm.m_wrapper.m_data;
135 m_wrapper.m_size = ivm.m_wrapper.m_size;
136 m_wrapper.width(ivm.m_wrapper.width());
137 m_file_name = ivm.m_file_name;
138 m_delete_on_close = ivm.m_delete_on_close;
139 ivm.m_wrapper.m_data = nullptr;
140 ivm.m_wrapper.m_size = 0;
141 ivm.m_mapped_data = nullptr;
142 ivm.m_fd = -1;
143 }
144
146 {
147 m_wrapper.m_data = ivm.m_wrapper.m_data;
148 m_wrapper.m_size = ivm.m_wrapper.m_size;
149 m_wrapper.width(ivm.m_wrapper.width());
150 m_file_name = ivm.m_file_name;
151 m_delete_on_close = ivm.m_delete_on_close;
152 ivm.m_wrapper.m_data = nullptr;
153 ivm.m_wrapper.m_size = 0;
154 ivm.m_mapped_data = nullptr;
155 ivm.m_fd = -1;
156 return (*this);
157 }
158
159 int_vector_mapper(std::string const & key, cache_config const & config) :
161 {}
162
163 int_vector_mapper(const std::string filename, bool is_plain = false, bool delete_on_close = false) :
164 m_data_offset(0),
165 m_file_name(filename),
166 m_delete_on_close(delete_on_close)
167 {
168 size_type size_in_bits = 0;
169 uint8_t int_width = t_width;
170 {
171 isfstream f(filename, std::ifstream::binary);
172 if (!f.is_open())
173 {
174 throw std::runtime_error("int_vector_mapper: file " + m_file_name + " does not exist.");
175 }
176 if (!is_plain)
177 {
178 m_data_offset = int_vector<t_width>::read_header(size_in_bits, int_width, f);
179 }
180 }
181
182 m_file_size_bytes = util::file_size(m_file_name);
183
184 if (is_plain)
185 {
186 if (8 != t_width and 16 != t_width and 32 != t_width and 64 != t_width)
187 {
188 throw std::runtime_error("int_vector_mapper: plain vector can "
189 "only be of width 8, 16, 32, 64.");
190 }
191 else
192 {
193 uint8_t byte_width = t_width / 8;
194 // if( m_file_size_bytes % (t_width/8) != 0)
195 if ((m_file_size_bytes & bits::lo_set[bits::cnt(byte_width - 1)]) != 0)
196 {
197 throw std::runtime_error("int_vector_mapper: plain vector not a multiple of byte: "
198 + std::to_string(m_file_size_bytes) + " mod " + std::to_string(byte_width)
199 + " != 0");
200 }
201 }
202 size_in_bits = m_file_size_bytes * 8;
203 }
204
205 // open backend file depending on mode
206 m_fd = memory_manager::open_file_for_mmap(m_file_name, t_mode);
207 if (m_fd == -1)
208 {
209 std::string open_error =
210 std::string("int_vector_mapper: open file error.") + std::string(util::str_from_errno());
211 throw std::runtime_error(open_error);
212 }
213
214 // prepare for mmap
215 m_wrapper.width(int_width);
216 // mmap data
217 m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
218 if (m_mapped_data == nullptr)
219 {
220 std::string mmap_error =
221 std::string("int_vector_mapper: mmap error. ") + std::string(util::str_from_errno());
222 throw std::runtime_error(mmap_error);
223 }
224
225 m_wrapper.m_size = size_in_bits;
226 free(m_wrapper.m_data);
227 m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
228 }
229
230 std::string file_name() const
231 {
232 return m_file_name;
233 }
235 {
236 return m_wrapper.width();
237 }
238 void width(const uint8_t new_int_width)
239 {
240 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'width'");
241 m_wrapper.width(new_int_width);
242 }
244 {
245 return m_wrapper.size();
246 }
248 {
249 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'bit_resize'");
250 size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3;
251 if (m_file_size_bytes != new_size_in_bytes + m_data_offset)
252 {
253 if (m_mapped_data)
254 {
255 auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
256 if (ret != 0)
257 {
258 std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
259 << std::endl;
260 }
261 }
262 int tret = memory_manager::truncate_file_mmap(m_fd, new_size_in_bytes + m_data_offset);
263 if (tret == -1)
264 {
265 std::string truncate_error =
266 std::string("int_vector_mapper: truncate error. ") + std::string(util::str_from_errno());
267 throw std::runtime_error(truncate_error);
268 }
269 m_file_size_bytes = new_size_in_bytes + m_data_offset;
270
271 // perform the actual mapping
272 m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
273 if (m_mapped_data == nullptr)
274 {
275 std::string mmap_error =
276 std::string("int_vector_mapper: mmap error. ") + std::string(util::str_from_errno());
277 throw std::runtime_error(mmap_error);
278 }
279
280 // update wrapper
281 m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
282 }
283 m_wrapper.m_size = bit_size;
284 }
285
287 {
288 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'resize'");
289 size_type size_in_bits = size * width();
290 bit_resize(size_in_bits);
291 }
292
294 {
295 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'begin'");
296 return m_wrapper.begin();
297 }
299 {
300 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'end'");
301 return m_wrapper.end();
302 }
303 auto begin() const -> typename int_vector<t_width>::const_iterator
304 {
305 return m_wrapper.begin();
306 }
307 auto end() const -> typename int_vector<t_width>::const_iterator
308 {
309 return m_wrapper.end();
310 }
311 auto cbegin() const -> typename int_vector<t_width>::const_iterator
312 {
313 return m_wrapper.begin();
314 }
315 auto cend() const -> typename int_vector<t_width>::const_iterator
316 {
317 return m_wrapper.end();
318 }
320 {
321 return m_wrapper[idx];
322 }
324 {
325 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'operator[]'");
326 return m_wrapper[idx];
327 }
328 uint64_t const * data() const
329 {
330 return m_wrapper.data();
331 }
332 uint64_t * data()
333 {
334 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'data'");
335 return m_wrapper.data();
336 }
337 value_type get_int(size_type idx, const uint8_t len = 64) const
338 {
339 return m_wrapper.get_int(idx, len);
340 }
341 void set_int(size_type idx, value_type x, const uint8_t len = 64)
342 {
343 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'set_int'");
344 m_wrapper.set_int(idx, x, len);
345 }
347 {
348 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'push_back'");
349 if (capacity() < size() + 1)
350 {
351 size_type old_size = m_wrapper.m_size;
352 size_type size_in_bits = (size() + append_block_size) * width();
353 bit_resize(size_in_bits);
354 m_wrapper.m_size = old_size;
355 }
356 // update size in wrapper only
357 m_wrapper.m_size += width();
358 m_wrapper[size() - 1] = x;
359 }
361 {
362 size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset);
363 return data_size_in_bits / width();
364 }
366 {
367 return m_wrapper.bit_size();
368 }
369 template <class container>
370 bool operator==(container const & v) const
371 {
372 return std::equal(begin(), end(), v.begin());
373 }
374 bool operator==(int_vector<t_width> const & v) const
375 {
376 return m_wrapper == v;
377 }
378 bool operator==(int_vector_mapper const & v) const
379 {
380 return m_wrapper == v.m_wrapper;
381 }
382 template <class container>
383 bool operator!=(container const & v) const
384 {
385 return !(*this == v);
386 }
387 void flip()
388 {
389 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'flip'");
390 m_wrapper.flip();
391 }
392 bool empty() const
393 {
394 return m_wrapper.empty();
395 }
396};
397
398template <uint8_t t_width = 0>
400{
401private:
402 static std::string tmp_file(std::string const & dir)
403 {
404 char tmp_file_name[1024] = {0};
405#ifdef _WIN32
406 auto ret = GetTempFileName(dir.c_str(), "tmp_mapper_file_", 0, tmp_file_name);
407 if (ret == 0)
408 {
409 throw std::runtime_error("could not create temporary file.");
410 }
411#else
412 snprintf(tmp_file_name,
413 sizeof(tmp_file_name),
414 "%s/tmp_mapper_file_%" PRIu64 "_XXXXXX.sdsl",
415 dir.c_str(),
416 util::pid());
417 int fd = mkstemps(tmp_file_name, 5);
418 if (fd == -1)
419 {
420 throw std::runtime_error("could not create temporary file.");
421 }
422 close(fd);
423#endif
424 return std::string(tmp_file_name, strlen(tmp_file_name));
425 }
426
427public:
429 {
430#ifdef MSVC_COMPILER
431 char tmp_dir_name[1024] = {0};
432 auto tmp_dir = GetTempPath(1024, tmp_dir_name);
433 auto file_name = tmp_file(tmp_dir_name);
434#else
435 auto file_name = tmp_file("/tmp");
436#endif
437 return create(file_name);
438 }
440 {
441 auto file_name = tmp_file(config.dir);
442 return create(file_name);
443 }
444 static int_vector_mapper<t_width> create(std::string const & file_name)
445 {
446 // write empty int_vector to init the file
447 int_vector<t_width> tmp_vector;
448 store_to_file(tmp_vector, file_name);
450 }
451};
452
453// creates emtpy int_vector<> that will not be deleted
454template <uint8_t t_width = 0>
456{
457public:
458 static int_vector_mapper<t_width> create(std::string const & key, cache_config & config)
459 {
460 auto file_name = cache_file_name(key, config);
461 auto tmp = create(file_name);
462 register_cache_file(key, config);
463 return std::move(tmp);
464 }
465 static int_vector_mapper<t_width> create(std::string const & file_name)
466 {
467 // write empty int_vector to init the file
468 int_vector<t_width> tmp_vector;
469 store_to_file(tmp_vector, file_name);
471 }
472 static int_vector_mapper<t_width> create(std::string const & file_name, size_t size, uint8_t int_width = t_width)
473 {
474 // write empty int_vector to init the file
475 int_vector<t_width> tmp_vector(0, 0, int_width);
476 store_to_file(tmp_vector, file_name);
478 mapper.resize(size);
479 return mapper;
480 }
481};
482
483template <std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
485
486template <uint8_t t_width = 0>
488
489} // namespace sdsl
490
491#endif
bits.hpp contains the sdsl::bits class.
int_vector_mapper(const std::string filename, bool is_plain=false, bool delete_on_close=false)
int_vector_mapper & operator=(int_vector_mapper const &)=delete
auto operator[](size_type const &idx) const -> typename int_vector< t_width >::const_reference
int_vector_mapper(int_vector_mapper const &)=delete
auto cbegin() const -> typename int_vector< t_width >::const_iterator
value_type get_int(size_type idx, const uint8_t len=64) const
bool operator==(int_vector< t_width > const &v) const
std::string file_name() const
void bit_resize(const size_type bit_size)
uint64_t const * data() const
bool operator==(int_vector_mapper const &v) const
bool operator!=(container const &v) const
auto begin() -> typename int_vector< t_width >::iterator
const size_type append_block_size
void width(const uint8_t new_int_width)
void set_int(size_type idx, value_type x, const uint8_t len=64)
int_vector< t_width >::int_width_type width_type
int_vector_mapper(int_vector_mapper &&ivm)
static constexpr uint8_t fixed_int_width
int_vector_mapper & operator=(int_vector_mapper &&ivm)
auto end() const -> typename int_vector< t_width >::const_iterator
auto cend() const -> typename int_vector< t_width >::const_iterator
bool operator==(container const &v) const
auto operator[](size_type const &idx) -> typename int_vector< t_width >::reference
auto end() -> typename int_vector< t_width >::iterator
int_vector< t_width >::size_type size_type
auto begin() const -> typename int_vector< t_width >::const_iterator
int_vector_mapper(std::string const &key, cache_config const &config)
int_vector< t_width >::difference_type difference_type
void resize(const size_type size)
int_vector< t_width >::value_type value_type
A proxy class that acts as a reference to an integer of length len bits in a int_vector.
A generic vector class for integers of width .
Definition io.hpp:36
int_vector_trait< t_width >::int_width_type int_width_type
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
value_type get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx in the int_vector.
ptrdiff_t difference_type
int_vector_trait< t_width >::const_reference const_reference
int_vector_size_type size_type
bool empty() const noexcept
Equivalent to size() == 0.
int_vector_trait< t_width >::value_type value_type
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
void flip()
Flip all bits of bit_vector.
size_type size() const noexcept
The number of elements in the int_vector.
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type bit_size() const noexcept
The number of bits in the int_vector.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
void set_int(size_type idx, value_type x, const uint8_t len=64)
Set the bits from position idx to idx+len-1 to the binary representation of integer x.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
bool is_open()
Is the stream close?
Definition sfstream.hpp:249
static void * mmap_file(int fd, uint64_t file_size, std::ios_base::openmode mode)
static int close_file_for_mmap(int fd)
static int mem_unmap(int fd, void *addr, const uint64_t size)
static int open_file_for_mmap(std::string &filename, std::ios_base::openmode mode)
static int truncate_file_mmap(int fd, const uint64_t new_size)
osfstream & seekp(pos_type pos)
Definition sfstream.hpp:125
static int_vector_mapper< t_width > create()
static int_vector_mapper< t_width > create(std::string const &file_name)
static int_vector_mapper< t_width > create(cache_config const &config)
static int_vector_mapper< t_width > create(std::string const &key, cache_config &config)
static int_vector_mapper< t_width > create(std::string const &file_name)
static int_vector_mapper< t_width > create(std::string const &file_name, size_t size, uint8_t int_width=t_width)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
memory_management.hpp contains two function for allocating and deallocating memory
Get the size of a file in bytes size_t file_size(std::string const &file)
Definition util.hpp:173
uint64_t pid()
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition io.hpp:717
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
Definition io.hpp:874
int_vector ::size_type size(range_type const &r)
Size of a range.
int_vector_mapper< t_width, std::ios_base::in > const read_only_mapper
ram_fs.hpp
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition bits.hpp:486
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition bits.hpp:194
Helper class for construction process.
Definition config.hpp:66
std::string dir
Definition config.hpp:70
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.