00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 # ifndef HUFFMAN_H
00045 # define HUFFMAN_H
00046
00047 # include <memory>
00048 # include <autosprintf.h>
00049 # include <tpl_treap.H>
00050 # include <tpl_binHeap.H>
00051 # include <tpl_dynMapTree.H>
00052 # include <bitArray.H>
00053
00054 namespace Aleph {
00055
00056 class Huffman_Node;
00057
00058 typedef DynMapTree<Treap_Vtl, string, Huffman_Node *> Symbol_Map;
00059 typedef BinNode< Aleph::pair<string, size_t> > Freq_Node;
00060
00061 struct Huffman_Node : public BinHeap<size_t>::Node
00062 {
00063 BinNode<string> * bin_node;
00064
00065 Freq_Node * freq_node;
00066
00067 Huffman_Node() : BinHeap<size_t>::Node(0), bin_node(NULL), freq_node(NULL)
00068 {
00069
00070 }
00071
00072 Huffman_Node(BinNode<string> * node)
00073 : BinHeap<size_t>::Node(0), bin_node(node), freq_node(NULL)
00074 {
00075
00076 }
00077
00078 ~Huffman_Node() { }
00079
00080 };
00081 typedef BinHeap<size_t> Huffman_Heap;
00082 static inline const size_t & get_freq(Huffman_Node * huffman_node)
00083
00084 {
00085 return huffman_node->get_key();
00086 }
00087
00088 static inline void increase_freq(Huffman_Node * huffman_node)
00089
00090 {
00091 huffman_node->get_key()++;
00092 }
00093
00094 static inline void set_freq(Huffman_Node * huffman_node, const size_t & freq)
00095
00096 {
00097 huffman_node->get_key() = freq;
00098 }
00099
00100 typedef DynMapTree<Treap_Vtl, string, BitArray> Code_Map;
00101 static inline bool is_leaf(BinNode<string> * p)
00102 {
00103 return LLINK(p) == NULL and RLINK(p) == NULL;
00104 }
00105
00111 class Huffman_Encoder_Engine
00112 {
00113 BinNode<string> * root;
00114 Huffman_Heap heap;
00115 Symbol_Map symbol_map;
00116 Code_Map code_map;
00117
00118 Freq_Node * freq_root;
00119
00120 string end_symbol;
00121 size_t text_len;
00122 void build_prefix_encoding(BinNode<string> * root,
00123 BitArray & array,
00124 const size_t & len)
00125 {
00126 if (is_leaf(root))
00127 {
00128 string & str = root->get_key();
00129
00130 code_map.insert(str, BitArray(array, len));
00131
00132 return;
00133 }
00134
00135 array[len] = 0;
00136 build_prefix_encoding(LLINK(root), array, len + 1);
00137
00138 array[len] = 1;
00139 build_prefix_encoding(RLINK(root), array, len + 1);
00140 }
00141 void build_encoding_map()
00142 {
00143
00144 I(symbol_map.size() > 0);
00145
00146 if (root == NULL)
00147 throw domain_error("Huffman encoding tree has not been generated");
00148
00149 const size_t h = computeHeightRec(root);
00150 BitArray array(h * symbol_map.size());
00151 symbol_map.empty();
00152 build_prefix_encoding(root, array, 0);
00153 }
00154 bool test_end(const string & str) const
00155 {
00156 if (end_symbol == "NO-END")
00157 return false;
00158
00159 return end_symbol == str;
00160 }
00161 void update_freq(const string & str)
00162 {
00163
00164 if (root != NULL)
00165 throw domain_error("Huffman encoding tree has already been generated");
00166
00167 if (test_end(str))
00168 throw domain_error("End symbol has already been inserted");
00169
00170 Huffman_Node * huffman_node = NULL;
00171
00172 Huffman_Node ** huffman_node_ptr = symbol_map.test(str);
00173
00174 if (huffman_node_ptr == NULL)
00175 {
00176 auto_ptr<BinNode<string> > bin_node_auto ( new BinNode<string> (str) );
00177
00178 huffman_node = static_cast<Huffman_Node*>
00179 (heap.insert(new Huffman_Node(bin_node_auto.get())));
00180
00181 symbol_map.insert(str, huffman_node);
00182
00183 bin_node_auto.release();
00184 }
00185 else
00186 huffman_node = *huffman_node_ptr;
00187
00188 increase_freq(huffman_node);
00189
00190 heap.update(huffman_node);
00191 }
00192 static void append_code(BitArray & bit_stream, size_t & bit_stream_len,
00193 const BitArray & symbol_code)
00194 {
00195 const size_t symbol_code_size = symbol_code.size();
00196
00197 if (bit_stream_len + symbol_code_size > bit_stream.size())
00198
00199 {
00200
00201 bit_stream.resize(5 * (bit_stream_len + symbol_code_size) / 4);
00202
00203 I(bit_stream_len + symbol_code_size < bit_stream.size());
00204 }
00205
00206
00207 for (int i = 0; i < symbol_code_size; i++)
00208 bit_stream[bit_stream_len++] = symbol_code[i];
00209 }
00210 public:
00212 Huffman_Encoder_Engine()
00213 : root(NULL), freq_root(NULL), end_symbol("NO-END"), text_len(0)
00214 {
00215
00216 }
00218 BinNode<string> * get_root()
00219 {
00220 if (root == NULL)
00221 throw std::domain_error("Huffman tree has not been generated");
00222
00223 return root;
00224 }
00241 BinNode<string> * generate_huffman_tree(
00242
00243 const bool & with_freqs = false
00244
00245 )
00246 {
00247 while (heap.size() > 1)
00248 {
00249 Huffman_Node * l_huffman_node =
00250 static_cast <Huffman_Node *> (heap.getMin());
00251
00252 Huffman_Node * r_huffman_node =
00253 static_cast <Huffman_Node *> (heap.getMin());
00254 BinNode <string> * bin_node = new BinNode <string>;
00255 Huffman_Node * huffman_node = new Huffman_Node (bin_node);
00256
00257 LLINK(bin_node) = l_huffman_node->bin_node;
00258 RLINK(bin_node) = r_huffman_node->bin_node;
00259
00260 const size_t new_freq = get_freq(l_huffman_node) + get_freq(r_huffman_node);
00261
00262 Aleph::set_freq(huffman_node, new_freq);
00263
00264 if (with_freqs)
00265 {
00266 Freq_Node *& l_freq_node = l_huffman_node->freq_node;
00267
00268 if (l_freq_node == NULL)
00269 {
00270 l_freq_node = new Freq_Node;
00271 l_freq_node->get_key().first =
00272 l_huffman_node->bin_node->get_key();
00273 l_freq_node->get_key().second = l_huffman_node->get_key();
00274 }
00275
00276 Freq_Node *& r_freq_node = r_huffman_node->freq_node;
00277
00278 if (r_freq_node == NULL)
00279 {
00280 r_freq_node = new Freq_Node;
00281 r_freq_node->get_key().first =
00282 r_huffman_node->bin_node->get_key();
00283 r_freq_node->get_key().second = r_huffman_node->get_key();
00284 }
00285
00286 const string str = gnu::autosprintf ("%d", new_freq);
00287
00288 Freq_Node *& freq_node = huffman_node->freq_node;
00289 freq_node = new Freq_Node;
00290 freq_node->get_key().first = str;
00291 freq_node->get_key().second = huffman_node->get_key();
00292 LLINK(freq_node) = l_freq_node;
00293 RLINK(freq_node) = r_freq_node;
00294 }
00295
00296 delete l_huffman_node;
00297 delete r_huffman_node;
00298
00299 heap.insert(huffman_node);
00300 }
00301
00302
00303 Huffman_Node * huffman_root = static_cast <Huffman_Node *> (heap.getMin());
00304
00305 root = huffman_root->bin_node;
00306
00307 if (with_freqs)
00308 freq_root = huffman_root->freq_node;
00309
00310 delete huffman_root;
00311
00312 build_encoding_map();
00313
00314 return root;
00315 }
00317 Freq_Node * get_freq_root()
00318 {
00319 if (freq_root == NULL)
00320 throw std::domain_error("Huffman tree has not been generated");
00321
00322 return freq_root;
00323 }
00333 void set_freq(const string & str, const size_t & freq)
00334
00335 {
00336 if (root != NULL)
00337 throw domain_error("Huffman encoding tree has already been generated");
00338
00339 if (test_end(str))
00340 throw domain_error("End symbol has already been inserted");
00341
00342
00343 Huffman_Node ** huffman_node_ptr = symbol_map.test(str);
00344
00345 if (huffman_node_ptr != NULL)
00346 throw std::domain_error
00347 (gnu::autosprintf("Frequency for symbol %s has already set",
00348 str.c_str()));
00349
00350 auto_ptr<BinNode<string> > bin_node_auto ( new BinNode<string> (str) );
00351
00352 Huffman_Node * huffman_node = new Huffman_Node(bin_node_auto.get());
00353
00354 Aleph::set_freq(huffman_node, freq);
00355
00356 heap.insert(huffman_node);
00357
00358 symbol_map.insert(str, huffman_node);
00359
00360 bin_node_auto.release();
00361 }
00362
00363 private:
00364
00365 static const size_t Max_Token_Size = 256;
00366
00367 public:
00368
00381 void read_input(char * input
00382
00383 , const bool & with_freqs = false
00384
00385 )
00386
00387 {
00388 if (root != NULL)
00389 throw domain_error("Huffman encoding tree has already been generated");
00390
00391 char * curr_stream = input;
00392 char curr_token[Max_Token_Size];
00393 curr_token[1] = '\0';
00394
00395 while (*curr_stream != '\0')
00396 {
00397 curr_token[0] = *curr_stream++;
00398
00399 update_freq(curr_token);
00400 text_len++;
00401 }
00402
00403 set_end_of_stream("");
00404 generate_huffman_tree(with_freqs);
00405 }
00418 void read_input(ifstream & input, const bool & with_freqs = false)
00419 {
00420 if (root != NULL)
00421 throw domain_error("Huffman encoding tree has already been generated");
00422
00423 char curr_token[2];
00424 curr_token[0] = curr_token[1] = '\0';
00425
00426 while (not input.eof())
00427 {
00428 input.read(curr_token, 1);
00429
00430 update_freq(curr_token);
00431 text_len++;
00432 }
00433
00434 set_end_of_stream("");
00435 generate_huffman_tree(with_freqs);
00436 }
00437
00439 void set_end_of_stream(const string & str)
00440 {
00441 if (test_end(str))
00442 throw domain_error("End symbol has already been inserted");
00443
00444 if (root != NULL)
00445 throw domain_error("Huffman encoding tree has already been generated");
00446
00447 auto_ptr<BinNode<string> > bin_node_auto ( new BinNode<string> (str) );
00448
00449 Huffman_Node * huffman_node = static_cast<Huffman_Node*>
00450 (heap.insert(new Huffman_Node(bin_node_auto.get())));
00451
00452 symbol_map.insert(str, huffman_node);
00453
00454 bin_node_auto.release();
00455
00456 end_symbol = str;
00457 }
00471 size_t encode(char * input, BitArray & bit_stream)
00472 {
00473
00474 if (root == NULL)
00475 throw std::domain_error("Huffman tree has not been generated");
00476
00477 char * curr_stream = input;
00478 char curr_token[Max_Token_Size];
00479 curr_token[1] = '\0';
00480
00481 size_t bit_stream_len = 0;
00482
00483 while (*curr_stream != '\0')
00484 {
00485 curr_token[0] = *curr_stream++;
00486
00487 append_code(bit_stream, bit_stream_len, code_map[curr_token]);
00488 }
00489
00490 append_code(bit_stream, bit_stream_len, code_map[""]);
00491
00492 return bit_stream_len;
00493 }
00494
00508 size_t encode(ifstream & input, BitArray & bit_stream)
00509 {
00510 if (root == NULL)
00511 throw std::domain_error("Huffman tree has not been generated");
00512
00513 char curr_token[2];
00514 curr_token[0] = curr_token[1] = '\0';
00515 size_t bit_stream_len = 0;
00516
00517 while (not input.eof())
00518 {
00519 input.read(curr_token, 1);
00520
00521 append_code(bit_stream, bit_stream_len, code_map[curr_token]);
00522 }
00523
00524 append_code(bit_stream, bit_stream_len, code_map[""]);
00525
00526 return bit_stream_len;
00527 }
00528
00529 };
00530
00536 class Huffman_Decoder_Engine
00537 {
00538 BinNode<string> * root;
00539 string end_symbol;
00540 public:
00541
00550 Huffman_Decoder_Engine(BinNode<string> * p, const string & end)
00551 : root(p), end_symbol(end)
00552 {
00553
00554 }
00556 BinNode<string> * get_root()
00557 {
00558 if (root == NULL)
00559 throw std::domain_error("Huffman tree has not been generated");
00560
00561 return root;
00562 }
00575 void decode(BitArray & bit_stream,
00576 const size_t & bit_stream_len,
00577 ostream & output)
00578 {
00579 BinNode<string> * p = root;
00580
00581 for (int i = 0; i < bit_stream_len; ++i)
00582 {
00583 if (bit_stream.read_bit(i) == 0)
00584 p = LLINK(p);
00585 else
00586 p = RLINK(p);
00587
00588
00589 if (p == NULL)
00590 throw std::domain_error("Invalid bits sequence");
00591
00592 if (is_leaf(p))
00593 {
00594 const string & symbol = p->get_key();
00595
00596 if (symbol == end_symbol)
00597 break;
00598
00599 output << symbol;
00600
00601 p = root;
00602 }
00603 }
00604 }
00605 };
00606
00607 }
00608
00609 # endif // HUFFMAN_H
00610