SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_hutu.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_WT_HUTU
10#define INCLUDED_SDSL_WT_HUTU
11
12#include <stddef.h>
13#include <stdint.h>
14#include <vector>
15
16#include <sdsl/int_vector.hpp>
17#include <sdsl/wt_helper.hpp>
18#include <sdsl/wt_pc.hpp>
19
21namespace sdsl
22{
23
24// forward declaration
25struct hutu_shape;
26
28
41template <class t_bitvector = bit_vector,
42 class t_rank = typename t_bitvector::rank_1_type,
43 class t_select = typename t_bitvector::select_1_type,
44 class t_select_zero = typename t_bitvector::select_0_type,
45 class t_tree_strat = byte_tree<>>
47
48// Hu Tucker shape for wt_pc
49template <class t_wt>
51{
52 typedef typename t_wt::size_type size_type;
53 enum
54 {
55 lex_ordered = 1
56 };
57
59 template <class t_element>
60 struct heap_node
61 {
62 t_element * item; // pointer to the represented item
63 heap_node *left, *right, *parent; // pointer to left/right child, parent
64 int64_t rank; // rank of the heap node
66 heap_node(t_element * it = nullptr) : item(it), left(nullptr), right(nullptr), parent(nullptr), rank(0)
67 {}
69 bool operator<(heap_node const & other)
70 {
71 return *item < *(other.item);
72 }
73 };
74
75 // Implementation of a leftist heap as needed in the first phase of
76 // Hu-Tucker Code construction
77 template <class t_element>
78 class l_heap
79 {
80 private:
81 heap_node<t_element> * m_root; // pointer to the root
82
83 // fixes node information after the deletion of elements
84 void fix_node(heap_node<t_element> * item)
85 {
86 if (item != nullptr)
87 {
88 if (!item->left || !item->right)
89 { // if node has only one child
90 // only go on fixing if the node information needs to be changed
91 if (item->rank != 0)
92 {
93 item->rank = 0;
94 if (item->parent)
95 fix_node(item->parent);
96 }
97 }
98 else
99 { // node information has to be adapted
100 int64_t nn = (item->left->rank > item->right->rank) ? item->right->rank : item->left->rank;
101 if (item->rank != nn && item->parent != 0)
102 {
103 item->rank = nn;
104 fix_node(item->parent);
105 }
106 }
107 }
108 }
109
110 // helper function to remove the data structure from memory
111 void free_node(heap_node<t_element> * item)
112 {
113 if (item->left)
114 {
115 free_node(item->left);
116 delete item->left;
117 item->left = nullptr;
118 }
119 if (item->right)
120 {
121 free_node(item->right);
122 delete item->right;
123 item->right = nullptr;
124 }
125 }
126
127 // internal merge function
129 {
130 if (!h1)
131 return h2;
132 if (!h2)
133 return h1;
134 if (*(h1->item) < *(h2->item))
135 return merge1(h1, h2);
136 else
137 return merge1(h2, h1);
138 }
139 // internal merge function
141 {
142 if (!h1->left)
143 { // if h1 has no children, the merge is simple
144 h1->left = h2;
145 h2->parent = h1; // adjust the parent pointer
146 }
147 else
148 {
149 h1->right = merge(h1->right, h2);
150 if (h1->right)
151 {
152 h1->right->parent = h1;
153 }
154
155 if ((h1->left->rank) < (h1->right->rank))
156 {
157 heap_node<t_element> * tmp = h1->left;
158 h1->left = h1->right;
159 h1->right = tmp;
160 }
161 h1->rank = h1->right->rank + 1;
162 }
163 return h1;
164 }
165
166 public:
168 l_heap() : m_root(nullptr)
169 {}
170
172 bool empty() const
173 {
174 return (m_root == nullptr);
175 }
176
178
182 {
183 return m_root;
184 }
185
187
191 {
192 if (m_root == nullptr)
193 return nullptr;
194 if (m_root->left == nullptr)
195 return m_root->right;
196 if (m_root->right == nullptr)
197 return m_root->left;
198
199 if (m_root->left->operator<(*m_root->right))
200 return m_root->left;
201 else
202 return m_root->right;
203 }
204
206
210 {
213 lh.m_root = n;
214 merge(&lh);
215 return n;
216 }
217
220 {
221 heap_node<t_element> * old_root = m_root;
222 m_root = merge(m_root->left, m_root->right);
223 if (m_root)
224 m_root->parent = nullptr;
225 delete old_root;
226 }
227
228 // deletes an arbitrary element from the heap
229 // this function assumes, that item is an element of the heap
231 {
232 if (item != nullptr)
233 {
234 if (m_root == item)
235 { // deleting the root is trivial
236 delete_min();
237 }
238 else
239 {
240 // otherwise we have to adapt the parent node and
241 // the children of item
242 heap_node<t_element> * h1 = merge(item->left, item->right);
243 if (h1)
244 h1->parent = item->parent;
245 if (item == item->parent->left)
246 {
247 item->parent->left = h1;
248 }
249 else if (item == item->parent->right)
250 {
251 item->parent->right = h1;
252 }
253 // fix node information considering rank
254 fix_node(item->parent);
255 delete item; // remove the item from memory
256 }
257 }
258 }
259
260 // public merge function
262 {
263 m_root = merge(m_root, rhs->m_root);
264 rhs->m_root = nullptr;
265 }
266
267 // removes the whole data structure from memory
269 {
270 if (m_root != nullptr)
271 {
272 free_node(m_root);
273 delete m_root;
274 m_root = nullptr;
275 }
276 }
277 };
278
279 // forward declaration of node classes
280 struct ht_node;
281
282 // Master node as used in the first phase of the Hu-Tucker algorithm
283 struct m_node
284 {
285 // min sum of the two min elements of the hpq this node points to
287 int64_t i; // position of the left node in the working sequence
288 int64_t j; // position of the right node in the working sequence
289 // pointer to the corresponding heap element (used for deletion)
291 l_heap<ht_node> * myhpq; // pointer to the hpq
292
293 ht_node * lt; // pointer to the left- and rightmost leafs of the hpq
294 ht_node * rt; // need for merge operations
295
296 m_node() : qel(0), myhpq(0), lt(0), rt(0)
297 {}
298
299 bool operator<(const m_node other)
300 {
301 if (min_sum != other.min_sum)
302 {
303 return min_sum < other.min_sum;
304 }
305 if (i != other.i)
306 {
307 return i < other.i;
308 }
309 return j < other.j;
310 }
311
312 bool operator>(const m_node other)
313 {
314 return other < *this;
315 }
316 };
317
318 // Hu-Tucker node as used in the first phase of the Hu-Tucker algorithm
319 struct ht_node
320 {
321 int64_t pos; // position of the node
322 uint64_t c; // the represented letter
323 size_type w; // frequency of the node
324 bool t; // whether the node is a leaf
325 int64_t level; // level in the tree
326
327 // pointer to the two master nodes
328 // (as a node can belong to up to two hpqs)
330 m_node * mpqr; // only mpql is used for inner nodes
331 // pointer to the two heap nodes (as a node can belong to up to two hpqs)
333 heap_node<ht_node> * qr; // only ql is used for inner nodes
334 ht_node * left; // left child
335 ht_node * right; // right child
336
337 ht_node() : mpql(0), mpqr(0), ql(0), qr(0), left(nullptr), right(nullptr)
338 {}
339
340 bool operator<(ht_node const & other)
341 {
342 if (w != other.w)
343 {
344 return w < other.w;
345 }
346 return pos < other.pos;
347 }
348
349 bool operator>(ht_node const & other)
350 {
351 return other < *this;
352 }
353 };
354
355 template <class t_rac>
356 static void construct_tree(t_rac & C, std::vector<pc_node> & temp_nodes)
357 {
358 // create a leaf for every letter
359 std::vector<ht_node> node_vector;
360 for (size_t i = 0; i < C.size(); i++)
361 {
362 if (C[i])
363 {
364 ht_node n;
365 n.c = (uint64_t)i;
366 n.w = C[i];
367 n.t = true;
368 n.pos = node_vector.size();
369 node_vector.push_back(n);
370 }
371 }
372 if (node_vector.size() == 1)
373 {
374 // special case of an alphabet of size 1:
375 // just instantly create the tree and return it
376 temp_nodes.emplace_back(pc_node(node_vector[0].w, (size_type)node_vector[0].c));
377 return;
378 }
379 size_type sigma = node_vector.size();
380 std::vector<ht_node> T(sigma); // physical leaves
381 std::vector<ht_node *> A(sigma); // the current working sequence
382 // Priority Queues, containing the Huffman Sequences
383 std::vector<l_heap<ht_node>> HPQ(sigma);
384 l_heap<m_node> MPQ; // Master Priority Queue
385
386 // init T, A, HPQs and MPQ
387 T[0] = node_vector[0];
388 A[0] = &T[0];
389
390 // initialization needed for every leaf
391 for (size_type i = 1; i < sigma; i++)
392 {
393 T[i] = node_vector[i];
394 A[i] = &T[i];
395
396 T[i - 1].qr = HPQ[i - 1].insert(&T[i - 1]);
397 T[i].ql = HPQ[i - 1].insert(&T[i]);
398
399 m_node * m = new m_node();
400 m->min_sum = T[i - 1].w + T[i].w;
401 m->i = i - 1;
402 m->j = i;
403 m->lt = &T[i - 1];
404 m->rt = &T[i];
405 m->myhpq = &HPQ[i - 1];
406
407 m->qel = MPQ.insert(m);
408
409 T[i - 1].mpqr = m;
410 T[i].mpql = m;
411 }
412
413 // main action loop
414 for (size_type k = 1; k < sigma; k++)
415 {
416 m_node * m = MPQ.find_min()->item;
417 ht_node * l = A[m->i];
418 ht_node * r = A[m->j];
419 int64_t lpos = m->i;
420 int64_t rpos = m->j;
421
422 l_heap<ht_node> * n_hpq = nullptr;
423 ht_node * n_rt = nullptr;
424 ht_node * n_lt = nullptr;
425
426 // create a new master priority queue
427 m_node * n_m = new m_node();
428 // delete old nodes from all hpqs
429 if (l->t)
430 {
431 if (l->mpql)
432 l->mpql->myhpq->delete_element(l->ql);
433 l->ql = nullptr;
434 if (l->mpqr)
435 l->mpqr->myhpq->delete_element(l->qr);
436 l->qr = nullptr;
437 }
438 else
439 {
440 m->myhpq->delete_element(l->ql);
441 l->ql = nullptr;
442 }
443 if (r->t)
444 {
445 if (r->mpql)
446 r->mpql->myhpq->delete_element(r->ql);
447 l->ql = nullptr;
448
449 if (r->mpqr)
450 r->mpqr->myhpq->delete_element(r->qr);
451 r->qr = nullptr;
452 }
453 else
454 {
455 m->myhpq->delete_element(r->ql);
456 r->ql = nullptr;
457 }
458 // handle the merge of hpqs
459 if (l->t && r->t)
460 {
461 // both nodes are leaves
462 l_heap<ht_node> * h1 = nullptr;
463 l_heap<ht_node> * h2 = nullptr;
464 l_heap<ht_node> * h3 = nullptr;
465 if (l->mpql)
466 {
467 n_lt = l->mpql->lt;
468 if (n_lt == l)
469 n_lt = nullptr;
470 if (n_lt)
471 n_lt->mpqr = n_m;
472
473 h1 = l->mpql->myhpq;
474 h2 = l->mpqr->myhpq;
475
476 h1->merge(h2);
477 MPQ.delete_element(l->mpql->qel);
478 MPQ.delete_element(l->mpqr->qel);
479 delete l->mpql;
480 delete l->mpqr;
481 }
482 else
483 {
484 h1 = l->mpqr->myhpq;
485 h2 = l->mpqr->myhpq;
486 n_lt = nullptr;
487
488 MPQ.delete_element(l->mpqr->qel);
489 delete l->mpqr;
490 }
491 if (r->mpqr)
492 {
493 n_rt = r->mpqr->rt;
494 if (n_rt == r)
495 n_rt = nullptr;
496 if (n_rt)
497 n_rt->mpql = n_m;
498
499 h3 = r->mpqr->myhpq;
500 h1->merge(h3);
501 MPQ.delete_element(r->mpqr->qel);
502 delete r->mpqr;
503
504 n_hpq = h1;
505 if (n_rt)
506 n_rt->mpql = n_m;
507 }
508 else
509 {
510 n_rt = nullptr;
511 n_hpq = h1;
512 }
513 }
514 else if (l->t)
515 { // the left node is a leaf
516 if (l->mpql)
517 {
518 n_lt = l->mpql->lt;
519 if (n_lt)
520 n_lt->mpqr = n_m;
521 n_rt = l->mpqr->rt;
522 if (n_rt)
523 n_rt->mpql = n_m;
524
525 l->mpql->myhpq->merge(l->mpqr->myhpq);
526 n_hpq = l->mpql->myhpq;
527 MPQ.delete_element(l->mpql->qel);
528 MPQ.delete_element(l->mpqr->qel);
529 delete l->mpql;
530 delete l->mpqr;
531 }
532 else
533 {
534 n_lt = nullptr;
535 n_rt = l->mpqr->rt;
536 if (n_rt)
537 n_rt->mpql = n_m;
538
539 n_hpq = l->mpqr->myhpq;
540 MPQ.delete_element(l->mpqr->qel);
541 delete l->mpqr;
542 }
543 }
544 else if (r->t)
545 { // right node is a leaf
546 if (r->mpqr)
547 {
548 n_lt = r->mpql->lt;
549 if (n_lt)
550 n_lt->mpqr = n_m;
551 n_rt = r->mpqr->rt;
552 if (n_rt)
553 n_rt->mpql = n_m;
554
555 r->mpql->myhpq->merge(r->mpqr->myhpq);
556 n_hpq = r->mpql->myhpq;
557 MPQ.delete_element(r->mpql->qel);
558 MPQ.delete_element(r->mpqr->qel);
559 delete r->mpql;
560 delete r->mpqr;
561 }
562 else
563 {
564 n_lt = r->mpql->lt;
565 if (n_lt)
566 n_lt->mpqr = n_m;
567 n_rt = nullptr;
568
569 n_hpq = r->mpql->myhpq;
570 MPQ.delete_element(r->mpql->qel);
571 delete r->mpql;
572 }
573 }
574 else
575 {
576 // merge of two inner nodes
577 // no need to merge hpqs
578 MPQ.delete_element(m->qel);
579
580 n_hpq = m->myhpq;
581 n_lt = m->lt;
582 n_rt = m->rt;
583
584 if (n_lt)
585 n_lt->mpqr = n_m;
586 if (n_rt)
587 n_rt->mpql = n_m;
588
589 delete m;
590 }
591
592 // create a new node with the information gained above
593 ht_node * new_node = new ht_node();
594 new_node->c = ' ';
595 new_node->w = l->w + r->w;
596 new_node->t = false;
597 new_node->pos = lpos;
598 new_node->left = l;
599 new_node->right = r;
600 // insert node to the correct hpq
601 new_node->ql = n_hpq->insert(new_node);
602
603 // update working sequence
604 A[lpos] = new_node;
605 A[rpos] = nullptr;
606 // update information in the new master node and reinsert it to mpq
607 ht_node * tmp_min = n_hpq->find_min()->item;
608 heap_node<ht_node> * tmpsnd = n_hpq->find_snd_min();
609 if (tmpsnd)
610 {
611 ht_node * tmp_snd = n_hpq->find_snd_min()->item;
612 n_m->min_sum = tmp_min->w + tmp_snd->w;
613
614 if (tmp_min->pos < tmp_snd->pos)
615 {
616 n_m->i = tmp_min->pos;
617 n_m->j = tmp_snd->pos;
618 }
619 else
620 {
621 n_m->i = tmp_snd->pos;
622 n_m->j = tmp_min->pos;
623 }
624 n_m->qel = MPQ.insert(n_m);
625 n_m->myhpq = n_hpq;
626 n_m->lt = n_lt;
627 n_m->rt = n_rt;
628 }
629 else
630 {
631 // free the last remaining hpq
632 n_hpq->free_memory();
633 delete n_m;
634 }
635 }
636
637 // level assignment and deletion of unneeded nodes
638 assign_level(A[0], 0);
639
640 // reconstruction phase using the stack algorithm
641 std::vector<ht_node *> stack(sigma, nullptr);
642
643 for (size_type i = 0; i < sigma; i++)
644 {
645 temp_nodes.emplace_back(pc_node(T[i].w, (size_type)T[i].c));
646 T[i].pos = i;
647 }
648
649 int64_t spointer = -1;
650 uint64_t qpointer = 0; // use the Array T as a stack
651 while (qpointer < sigma or spointer >= 1LL)
652 {
653 if (spointer >= 1LL and (stack[spointer]->level == stack[spointer - 1]->level))
654 {
655 ht_node * n_node = new ht_node();
656 n_node->t = false;
657 n_node->left = stack[spointer - 1];
658 n_node->right = stack[spointer];
659 n_node->level = stack[spointer]->level - 1;
660 n_node->w = stack[spointer]->w + stack[spointer - 1]->w;
661 n_node->c = '|';
662
663 n_node->pos = temp_nodes.size();
664 temp_nodes[stack[spointer - 1]->pos].parent = temp_nodes.size();
665 temp_nodes[stack[spointer]->pos].parent = temp_nodes.size();
666 temp_nodes.emplace_back(
667 pc_node(n_node->w, 0, pc_node::undef, stack[spointer - 1]->pos, stack[spointer]->pos));
668
669 if (!stack[spointer - 1]->t)
670 delete stack[spointer - 1];
671 if (!stack[spointer]->t)
672 delete stack[spointer];
673
674 stack[--spointer] = n_node;
675 }
676 else
677 {
678 stack[++spointer] = &T[qpointer++];
679 }
680 }
681 delete stack[0];
682 }
683
684 static void assign_level(ht_node * n, int64_t lvl)
685 {
686 if (n)
687 {
688 n->level = lvl;
689 assign_level(n->left, lvl + 1);
690 assign_level(n->right, lvl + 1);
691
692 if (!n->t)
693 {
694 delete n;
695 }
696 }
697 }
698};
699
701{
702 template <class t_wt>
704};
705
706} // end namespace sdsl
707
708#endif // end file
void merge(l_heap< t_element > *rhs)
Definition wt_hutu.hpp:261
heap_node< t_element > * insert(t_element *x)
Insert an element into the heap.
Definition wt_hutu.hpp:209
void delete_element(heap_node< t_element > *item)
Definition wt_hutu.hpp:230
l_heap()
Default constructor.
Definition wt_hutu.hpp:168
bool empty() const
Indicates if the heap is empty.
Definition wt_hutu.hpp:172
heap_node< t_element > * find_min() const
Get the smallest element.
Definition wt_hutu.hpp:181
void delete_min()
Delete the smallest element in the heap.
Definition wt_hutu.hpp:219
heap_node< t_element > * find_snd_min() const
Get the second smallest element.
Definition wt_hutu.hpp:190
A prefix code-shaped wavelet.
Definition wt_pc.hpp:60
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Node class used by the leftist heap.
Definition wt_hutu.hpp:61
bool operator<(heap_node const &other)
Less then operator.
Definition wt_hutu.hpp:69
heap_node(t_element *it=nullptr)
Constructor.
Definition wt_hutu.hpp:66
heap_node< ht_node > * qr
Definition wt_hutu.hpp:333
bool operator>(ht_node const &other)
Definition wt_hutu.hpp:349
bool operator<(ht_node const &other)
Definition wt_hutu.hpp:340
heap_node< ht_node > * ql
Definition wt_hutu.hpp:332
bool operator<(const m_node other)
Definition wt_hutu.hpp:299
l_heap< ht_node > * myhpq
Definition wt_hutu.hpp:291
bool operator>(const m_node other)
Definition wt_hutu.hpp:312
heap_node< m_node > * qel
Definition wt_hutu.hpp:290
static void construct_tree(t_rac &C, std::vector< pc_node > &temp_nodes)
Definition wt_hutu.hpp:356
t_wt::size_type size_type
Definition wt_hutu.hpp:52
static void assign_level(ht_node *n, int64_t lvl)
Definition wt_hutu.hpp:684
wt_pc.hpp contains a class for the wavelet tree of byte sequences.