8#ifndef INCLUDED_SDSL_CONSTRUCT_LCP
9#define INCLUDED_SDSL_CONSTRUCT_LCP
62template <u
int8_t t_w
idth>
65 static_assert(t_width == 0 or t_width == 8,
66 "construct_lcp_kasai: width must be `0` for integer alphabet and `8` for byte alphabet");
85 for (size_type i = 0, j = 0, sa_1 = 0, l = 0, n = isa_buf.
size(); i < n; ++i)
94 while (text[i + l] == text[j + l])
107 for (size_type i = sa.
size(); i > 1; --i)
109 sa[i - 1] = sa[i - 2];
133template <u
int8_t t_w
idth>
136 static_assert(t_width == 0 or t_width == 8,
137 "construct_lcp_PHI: width must be `0` for integer alphabet and `8` for byte alphabet");
142 size_type n = sa_buf.
size();
154 for (size_type i = 0, sai_1 = 0; i < n; ++i)
156 size_type sai = sa_buf[i];
167 for (size_type i = 0, l = 0; i < n - 1; ++i)
169 size_type phii = plcp[i];
170 while (text[i + l] == text[phii + l])
177 max_l = std::max(max_l, l);
182 uint8_t lcp_width =
bits::hi(max_l) + 1;
186 size_type buffer_size = 1000000;
190 for (size_type i = 1; i < n; ++i)
192 size_type sai = sa_buf[i];
193 lcp_buf[i] = plcp[sai];
220 size_type n = sa_buf.
size();
227 const uint8_t log_q = 6;
228 const uint32_t q = 1 << log_q;
234 for (size_type i = 0, sai_1 = 0; i < n; ++i)
237 size_type sai = sa_buf[i];
238 if ((sai & modq) == 0)
240 if ((sai >> log_q) >= plcp.
size())
245 plcp[sai >> log_q] = sai_1;
253 for (size_type i = 0, j, k, l = 0; i < plcp.
size(); ++i)
257 while (text[j + l] == text[k + l])
270 size_type buffer_size = 4000000;
277 for (size_type i = 0, sai_1 = 0, l = 0, sai = 0, iq = 0; i < n; ++i)
281 if ((sai & modq) == 0)
283 lcp_out_buf[i] = l = plcp[sai >> log_q];
288 l = plcp[sai >> log_q];
293 while (text[sai + l] == text[sai_1 + l])
299 for (j = 0; j < l; ++j)
301 if (text[sai + j] != text[sai_1 + j])
303 std::cout <<
"lcp[" << i <<
"]=" << l <<
" is two big! " << j <<
" is right!"
304 <<
" sai=" << sai << std::endl;
305 if ((sai & modq) != 0)
306 std::cout <<
" plcp[sai>>log_q]=" << plcp[sai >> log_q] <<
" sai-iq=" << sai - iq <<
" sai=" << sai
307 <<
" sai-iq=" << sai - iq << std::endl;
341#ifdef STUDY_INFORMATIONS
343 size_type matches = 0;
344 size_type comps2 = 0;
349 const size_type n = sa_buf.
size();
350 const size_type m = 254;
359 size_type cnt_c[257] = {0};
360 size_type cnt_cc[257] = {0};
361 size_type cnt_cc2[257] = {0};
362 size_type omitted_c[257] = {0};
363 size_type prev_occ_in_bwt[256] = {0};
364 for (size_type i = 0; i < 256; ++i)
365 prev_occ_in_bwt[i] = (size_type)-1;
366 unsigned char alphabet[257] = {0};
370 size_type m_char_count[2] = {0};
371 uint8_t m_chars[2][256] = {{0}, {0}};
380 for (size_type i = 0; i < n; ++i)
382 ++cnt_c[text[i] + 1];
384 for (
int i = 1; i < 257; ++i)
388 alphabet[sigma++] = (
unsigned char)(i - 1);
390 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
392 alphabet[sigma] =
'\0';
395 size_type sai_1 = sa_buf[0];
396 uint8_t bwti_1 = bwt_buf[0];
397 lcp_sml[cnt_cc[bwti_1]++] = 0;
398 prev_occ_in_bwt[bwti_1] = 0;
399 ++omitted_c[alphabet[0]];
406 size_type rmq_end = 3;
408 const size_type m_mod2 = m % 2;
409 uint8_t cur_c = alphabet[1];
410 size_type big_val = 0;
411 for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt)
413 uint8_t bwti = bwt_buf[i];
415 size_type lf = cnt_cc[bwti];
418 if (cur_c_cnt < sigma)
420 cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1];
424 if (i >= cnt_cc[cur_c])
426 if (bwti == bwti_1 and lf < i)
428 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
431 l += (text[sai_1 + m] == text[sai + m]);
432#ifdef STUDY_INFORMATIONS
433 if ((sai_1 ^ sai) >> 6)
442 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
443#ifdef STUDY_INFORMATIONS
444 if ((sai_1 ^ sai) >> 6)
447 while (text[sai_1 + l] == text[sai + l] and l < m + 1)
450#ifdef STUDY_INFORMATIONS
464 if (i > 10000 and i < 10500 and big_val > 3000)
467 util::clear(lcp_sml);
475 size_type j = rmq_end;
476 while (x <= rmq_stack[j])
478 rmq_stack[++j] = i + 1;
485 size_type x_pos = prev_occ_in_bwt[bwti] + 2;
487 while (x_pos <= rmq_stack[j])
490 rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2);
501 prev_occ_in_bwt[bwti] = i;
509 if (n > 1000 and nn > 5 * (n / 6))
511 util::clear(lcp_sml);
517#ifdef STUDY_INFORMATIONS
518 std::cout <<
"# n=" << n <<
" nn=" << nn <<
" nn/n=" << ((double)nn) / n << std::endl;
534 for (size_type i = 0; i < n; ++i)
536 if (lcp_sml_buf[i] >= m)
543 cnt_cc2[0] = cnt_cc[0] = 0;
544 for (size_type i = 1, omitted_sum = 0; i < 257; ++i)
546 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
547 omitted_sum += omitted_c[i - 1];
548 cnt_cc2[i] = cnt_cc[i] - omitted_sum;
552 for (size_type i = 0, i2 = 0; i < n; ++i)
554 uint8_t b = bwt_buf[i];
555 size_type lf_i = cnt_cc[b];
560 lcp_big[i2] = cnt_cc2[b];
586 for (size_type i = 0, i2 = 0; i < n; ++i)
588 uint8_t b = bwt_buf[i];
589 if (lcp_sml_buf[i] >= m)
592 shift_bwt2[i2] = b_1;
612 size_type char_ex[256];
613 for (size_type i = 0; i < 256; ++i)
615 size_type char_occ = 0;
616 size_type m_mod2 = m2 % 2, mm1_mod2 = (m2 + 1) % 2;
617 while (m_char_count[m_mod2] > 0)
622 mm1_mod2 = (m2 + 1) % 2, m_mod2 = m2 % 2;
623 m_char_count[m_mod2] = 0;
625 std::sort(m_chars[mm1_mod2],
626 m_chars[mm1_mod2] + m_char_count[mm1_mod2]);
628 for (size_type mc = 0; mc < m_char_count[mm1_mod2]; ++mc)
630 tLI & mm1_mc_list = m_list[mm1_mod2][m_chars[mm1_mod2][m_char_count[mm1_mod2] - 1 - mc]];
632 while (!mm1_mc_list.empty())
634 size_type i = mm1_mc_list.
front();
635 mm1_mc_list.pop_front();
637 for (size_type k = i; todo2[k]; --k)
639#ifdef STUDY_INFORMATIONS
642 uint8_t b = shift_bwt2[k];
651 for (size_type k = i; todo2[k] and char_occ; ++k)
653#ifdef STUDY_INFORMATIONS
659 size_type p = lcp_big[k];
681 const size_type buffer_size = 1000000;
690 lcp_big_buf.
width());
691 for (size_type i = 0, i2 = 0; i < n; ++i)
693 size_type l = lcp_sml_buf[i];
705#ifdef STUDY_INFORMATIONS
706 std::cout <<
"# racs: " << racs << std::endl;
707 std::cout <<
"# matches: " << matches << std::endl;
708 std::cout <<
"# comps2: " << comps2 << std::endl;
738 const size_type n = sa_buf.
size();
739 const size_type m = 254;
748 size_type cnt_c[257] = {0};
749 size_type cnt_cc[257] = {0};
750 size_type omitted_c[257] = {0};
751 size_type prev_occ_in_bwt[256] = {0};
752 for (size_type i = 0; i < 256; ++i)
753 prev_occ_in_bwt[i] = (size_type)-1;
754 unsigned char alphabet[257] = {0};
764 for (size_type i = 0; i < n; ++i)
766 ++cnt_c[text[i] + 1];
768 for (
int i = 1; i < 257; ++i)
772 alphabet[sigma++] = (
unsigned char)(i - 1);
774 cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1];
776 alphabet[sigma] =
'\0';
779 size_type sai_1 = sa_buf[0];
780 uint8_t bwti_1 = bwt_buf[0];
781 lcp_sml[cnt_cc[bwti_1]++] = 0;
782 prev_occ_in_bwt[bwti_1] = 0;
783 ++omitted_c[alphabet[0]];
790 size_type rmq_end = 3;
792 uint8_t cur_c = alphabet[1];
793 for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt)
795 uint8_t bwti = bwt_buf[i];
797 size_type lf = cnt_cc[bwti];
800 if (cur_c_cnt < sigma)
802 cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1];
806 if (i >= cnt_cc[cur_c])
808 if (bwti == bwti_1 and lf < i)
810 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
813 l += (text[sai_1 + m] == text[sai + m]);
820 l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0;
821 while (text[sai_1 + l] == text[sai + l] and l < m + 1)
835 size_type j = rmq_end;
836 while (x <= rmq_stack[j])
838 rmq_stack[++j] = i + 1;
845 size_type x_pos = prev_occ_in_bwt[bwti] + 2;
847 while (x_pos <= rmq_stack[j])
850 rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2);
859 prev_occ_in_bwt[bwti] = i;
874 size_type sa_n_1 = 0;
879 for (size_type i = 0; i < n; ++i)
881 if (lcp_sml_buf[i] > m)
886 sa_n_1 = sa_buf[n - 1];
890 const size_type bot = sa_n_1;
896 for (size_type i = 0, sai_1 = 0; i < n; ++i)
898 uint8_t b = bwt_buf[i];
899 size_type sai = sa_buf[i];
900 if (lcp_sml_buf[i] > m and b != b_1)
902 phi[todo_rank(sai)] = sai_1;
910 for (size_type i = 0, ii = 0, l = m + 1, p = 0; i < n and ii < nn; ++i)
914 if (i > 0 and todo[i - 1])
918 if ((p = phi[ii]) != bot)
920 while (text[i + l] == text[p + l])
931 for (size_type i = 0, ii = 0; i < n and ii < nn; ++i)
933 if (lcp_sml_buf[i] > m)
935 lcp_big[ii++] = phi[todo_rank(sa_buf[i])];
946 const size_type buffer_size = 1000000;
955 lcp_big_buf.
width());
957 for (size_type i = 0, i2 = 0; i < n; ++i)
959 size_type l = lcp_sml_buf[i];
967 lcp_big_buf.
close(
true);
968 lcp_sml_buf.
close(
true);
990template <
typename t_wt = wt_huff<bit_vector, rank_support_v<>, select_support_scan<1>, select_support_scan<0>>>
1000 uint64_t n = wt_bwt.size();
1005 size_type lcp_value = 0;
1006 size_type lcp_value_offset = 0;
1007 size_type phase = 0;
1009 size_type intervals = 0;
1010 size_type intervals_new = 0;
1012 std::queue<size_type> q;
1013 std::vector<bit_vector> dict(2);
1014 size_type source = 0, target = 1;
1015 bool queue_used =
true;
1016 size_type use_queue_and_wt = n / 2048;
1020 std::vector<unsigned char> cs(wt_bwt.sigma);
1021 std::vector<size_type> rank_c_i(wt_bwt.sigma);
1022 std::vector<size_type> rank_c_j(wt_bwt.sigma);
1026 size_type bb = (n * 20 -
size_in_bytes(wt_bwt) * 8 * 1.25 - 5 * n)
1032 bb = std::min(bb, (size_type)8);
1034 size_type lcp_value_max = (1ULL << bb) - 1;
1035 size_type space_in_bit_for_lcp = n * bb;
1037#ifdef STUDY_INFORMATIONS
1038 std::cout <<
"# l=" << n <<
" b=" << (int)bb <<
" lcp_value_max=" << lcp_value_max
1039 <<
" size_in_bytes(wt_bwt<v,bs,bs>)=" <<
size_in_bytes(wt_bwt) << std::endl;
1050 std::vector<size_type> C;
1058 index_done[0] =
true;
1060 for (size_type i = 0; i < quantity; ++i)
1062 unsigned char c = cs[i];
1063 size_type a_new = C[c] + rank_c_i[i];
1064 size_type b_new = C[c] + rank_c_j[i];
1067 if (!index_done[b_new])
1070 partial_lcp[b_new] = lcp_value;
1071 index_done[b_new] =
true;
1083 if (intervals < use_queue_and_wt && !queue_used)
1086 util::clear(dict[target]);
1091 while (b2 < dict[source].
size())
1093 q.push((a2 - 1) >> 1);
1099 util::clear(dict[source]);
1102 if (intervals >= use_queue_and_wt && queue_used)
1105 dict[source].resize(2 * (n + 1));
1111 dict[source][(q.front() << 1) + 1] = 1;
1113 dict[source][(q.front() << 1)] = 1;
1116 dict[target].resize(2 * (n + 1));
1122 if (intervals < use_queue_and_wt)
1129 size_type a = q.
front();
1131 size_type b = q.
front();
1136 for (size_type i = 0; i < quantity; ++i)
1138 unsigned char c = cs[i];
1139 size_type a_new = C[c] + rank_c_i[i];
1140 size_type b_new = C[c] + rank_c_j[i];
1143 if (!index_done[b_new] and phase == 0)
1145 partial_lcp[b_new] = lcp_value;
1146 index_done[b_new] =
true;
1152 else if (!index_done[b_new])
1154 size_type insert_pos = b_new - ds_rank_support.
rank(b_new);
1155 if (!partial_lcp[insert_pos])
1157 partial_lcp[insert_pos] = lcp_value - lcp_value_offset;
1166 intervals = intervals_new;
1177 while (b2 < dict[source].
size())
1179 interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j);
1180 for (size_type i = 0; i < quantity; ++i)
1182 unsigned char c = cs[i];
1183 size_type a_new = C[c] + rank_c_i[i];
1184 size_type b_new = C[c] + rank_c_j[i];
1186 if (!index_done[b_new] and phase == 0)
1188 partial_lcp[b_new] = lcp_value;
1189 index_done[b_new] =
true;
1191 dict[target][(a_new << 1) + 1] = 1;
1192 dict[target][(b_new << 1)] = 1;
1195 else if (!index_done[b_new])
1197 size_type insert_pos = b_new - ds_rank_support.
rank(b_new);
1198 if (!partial_lcp[insert_pos])
1200 partial_lcp[insert_pos] = lcp_value - lcp_value_offset;
1202 dict[target][(a_new << 1) + 1] = 1;
1203 dict[target][(b_new << 1)] = 1;
1212 std::swap(source, target);
1216 if (lcp_value >= lcp_value_max)
1221 insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset);
1229 util::init_support(ds_rank_support, &index_done);
1232 lcp_value_offset = lcp_value_max - 1;
1233 size_type remaining_lcp_values = index_done.
size() - ds_rank_support.
rank(index_done.
size());
1235 uint8_t int_width_new =
1236 std::min(space_in_bit_for_lcp / remaining_lcp_values, (size_type)
bits::hi(n - 1) + 1);
1237 lcp_value_max = lcp_value_offset + (1ULL << int_width_new);
1238#ifdef STUDY_INFORMATIONS
1239 std::cout <<
"# l=" << remaining_lcp_values <<
" b=" << (int)int_width_new
1240 <<
" lcp_value_max=" << lcp_value_max << std::endl;
1243 partial_lcp.
width(int_width_new);
1244 partial_lcp.
resize(remaining_lcp_values);
1257 insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset);
1284template <
typename t_wt = wt_huff<bit_vector, rank_support_v<>, select_support_scan<1>, select_support_scan<0>>>
1290 size_type buffer_size = 1000000;
1291 size_type lcp_value = 0;
1304 size_type intervals = 0;
1305 size_type intervals_new = 0;
1307 std::queue<size_type> q;
1308 std::vector<bit_vector> dict(2);
1309 size_type source = 0, target = 1;
1310 bool queue_used =
true;
1311 size_type use_queue_and_wt = n / 2048;
1315 std::vector<unsigned char> cs(wt_bwt.sigma);
1316 std::vector<size_type> rank_c_i(wt_bwt.sigma);
1317 std::vector<size_type> rank_c_j(wt_bwt.sigma);
1320 bool new_lcp_value =
false;
1321 uint8_t int_width =
bits::hi(n) + 2;
1326 size_type idx_out_buf = 0;
1330 std::vector<size_type> C;
1337 lcp_positions_buf[idx_out_buf++] = 0;
1340 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1341 new_lcp_value =
false;
1343 index_done[0] =
true;
1347 for (size_type i = 0; i < quantity; ++i)
1349 unsigned char c = cs[i];
1350 size_type a_new = C[c] + rank_c_i[i];
1351 size_type b_new = C[c] + rank_c_j[i];
1354 if (!index_done[b_new])
1359 lcp_positions_buf[idx_out_buf++] = b_new;
1361 index_done[b_new] =
true;
1370 new_lcp_value =
true;
1375 if (intervals < use_queue_and_wt && !queue_used)
1378 util::clear(dict[target]);
1383 while (b2 < dict[source].
size())
1385 q.push((a2 - 1) >> 1);
1391 util::clear(dict[source]);
1394 if (intervals >= use_queue_and_wt && queue_used)
1397 dict[source].resize(2 * (n + 1));
1402 dict[source][(q.front() << 1) + 1] = 1;
1404 dict[source][(q.front() << 1)] = 1;
1407 dict[target].resize(2 * (n + 1));
1412 if (intervals < use_queue_and_wt)
1419 size_type a = q.
front();
1421 size_type b = q.
front();
1426 for (size_type i = 0; i < quantity; ++i)
1428 unsigned char c = cs[i];
1429 size_type a_new = C[c] + rank_c_i[i];
1430 size_type b_new = C[c] + rank_c_j[i];
1433 if (!index_done[b_new])
1436 lcp_positions_buf[idx_out_buf++] = b_new;
1440 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1441 new_lcp_value =
false;
1443 index_done[b_new] =
true;
1452 intervals = intervals_new;
1462 while (b2 < dict[source].
size())
1464 interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j);
1465 for (size_type i = 0; i < quantity; ++i)
1467 unsigned char c = cs[i];
1468 size_type a_new = C[c] + rank_c_i[i];
1469 size_type b_new = C[c] + rank_c_j[i];
1471 if (!index_done[b_new])
1474 lcp_positions_buf[idx_out_buf++] = b_new;
1478 lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n;
1479 new_lcp_value =
false;
1481 index_done[b_new] =
true;
1483 dict[target][(a_new << 1) + 1] = 1;
1484 dict[target][(b_new << 1)] = 1;
1492 std::swap(source, target);
1496 new_lcp_value =
true;
1499 lcp_positions_buf.
close();
1507 uint8_t int_width =
bits::hi(lcp_value + 1) + 1;
1512 size_type number_of_values = ((n / ((int_width - 1ULL) / 8 + 1) + 16) & (~(0x7ULL)));
1516 number_of_values * int_width / 8,
1518 number_of_values = lcp_array.
buffersize() * 8 / int_width;
1520 for (size_type position_begin = 0, position_end = number_of_values; position_begin < n and number_of_values > 0;
1521 position_begin = position_end, position_end += number_of_values)
1523#ifdef STUDY_INFORMATIONS
1524 std::cout <<
"# number_of_values=" << number_of_values <<
" fill lcp_values with " << position_begin
1525 <<
" <= position <" << position_end <<
", each lcp-value has " << (int)int_width
1526 <<
" bit, lcp_value_max=" << lcp_value <<
" n=" << n << std::endl;
1529 for (size_type i = 0; i < n; ++i)
1531 size_type position = lcp_positions[i];
1537 if (position_begin <= position and position < position_end)
1539 lcp_array[position] = lcp_value;
1546 lcp_positions.
close(
true);
bits.hpp contains the sdsl::bits class.
uint64_t size() const
Returns the number of elements currently stored.
void close(bool remove_file=false)
Close the int_vector_buffer.
uint64_t buffersize() const
Returns the buffersize in bytes.
uint8_t width() const
Returns the width of the integers which are accessed via the [] operator.
A generic vector class for integers of width .
int_vector_size_type size_type
reference front() noexcept
Returns first element.
size_type size() const noexcept
The number of elements in the int_vector.
void shrink_to_fit()
Free unused allocated memory.
void resize(const size_type size)
Resize the int_vector in terms of elements.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
static mm_event_proxy event(std::string const &name)
A rank structure proposed by Sebastiano Vigna.
size_type rank(size_type idx) const
Answers rank queries for the supported bit_vector.
construct_isa.hpp contains a space and time efficient construction method for the inverse suffix arra...
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
memory_tracking.hpp contains two function for allocating and deallocating memory
constexpr char KEY_TEXT[]
Get the smallest position f$i geq idx f$ where a bit is set t_int_vec::size_type next_bit(t_int_vec const &v, uint64_t idx)
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Namespace for the succinct data structure library.
bool store_to_cache(T const &v, std::string const &key, cache_config &config, bool add_type_hash=false)
Stores the object v as a resource in the cache.
void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
void construct_lcp_kasai(cache_config &config)
Construct the LCP array for text over byte- or integer-alphabet.
void construct_lcp_bwt_based(cache_config &config)
Construct the LCP array out of the BWT (only for byte strings)
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
void insert_lcp_values(int_vector<> &partial_lcp, bit_vector &index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset)
Merges a partial LCP array into the LCP array on disk.
void construct_lcp_PHI(cache_config &config)
Construct the LCP array for text over byte- or integer-alphabet.
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
T::size_type size_in_bytes(T const &t)
Get the size of a data structure in bytes.
void create_C_array(std::vector< uint64_t > &C, tWT const &wt)
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
void construct_lcp_semi_extern_PHI(cache_config &config)
Construct the LCP array (only for byte strings)
void construct_lcp_goPHI(cache_config &config)
Construct the LCP array (only for byte strings)
void construct_isa(cache_config &config)
void construct_lcp_bwt_based2(cache_config &config)
Construct the LCP array out of the BWT (only for byte strings)
void interval_symbols(t_wt const &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
int_vector ::size_type size(range_type const &r)
Size of a range.
void construct_lcp_go(cache_config &config)
Construct the LCP array (only for byte strings)
std::list< int_vector<>::size_type > tLI
void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
rank_support_v.hpp contains rank_support_v.
select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
static constexpr uint64_t lo_unset[65]
lo_unset[i] is a 64-bit word with the i least significant bits not set and the high bits set.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Helper class for construction process.
Helper classes to transform width=0 and width=8 to corresponding text key.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.