55#include " utils.hpp"
66#include " Logger.hpp"
77
8+ // //////////////////////////////////////////////////////////////////////////////
9+ // / Private functions
10+ // //////////////////////////////////////////////////////////////////////////////
811
9- template <class T >
10- algo::Huffman<T>::Huffman(void ) {
12+ /* *
13+ * @brief A comparator to sort Codeword pairs by bit length.
14+ */
15+ struct CodewordComparator {
16+ bool operator ()(const std::pair<uint8_t , algo::Codeword>& first,
17+ const std::pair<uint8_t , algo::Codeword>& second)
18+ {
19+ return first.second .len > second.second .len ;
20+ }
21+ };
1122
23+ /* *
24+ * @brief Add the given settings to the output stream according to the amount of bits
25+ * specified in the Huffman class.
26+ * @param length
27+ * The length of the sequence that will follow this header.
28+ * If the length is 0, only one '0' bit will be written.
29+ * @param bit_length
30+ * The amount of bits needed for every data element in the sequence following this header.
31+ * Keys always use KEY_BITS as length, and values use bit_length, which is different for each group.
32+ * This is done to minimize the amoutn of bits needed to save the Huffman dictionary.
33+ * @param writer
34+ * The outputstream to write to.
35+ */
36+ template <class T >
37+ void algo::Huffman<T>::add_huffman_dict_header(uint32_t length, uint32_t bit_length, util::BitStreamWriter& writer) {
38+ if (length > 0 ) {
39+ writer.put (algo::Huffman<T>::DICT_HDR_HAS_ITEMS_BITS + algo::Huffman<T>::DICT_HDR_SEQ_LENGTH_BITS,
40+ 0x80 | (length & 0x7F )); // MSB is HAS_ITEMS setting + 7 bits length
41+ writer.put (algo::Huffman<T>::DICT_HDR_ITEM_BITS,
42+ bit_length & 0xF ); // 4 bits for bit length of every dict item
43+ } else {
44+ writer.put_bit (0 );
45+ }
1246}
1347
48+ /* *
49+ * @brief Read a dictionary header from the inputstream and set the given variables.
50+ *
51+ * @param reader
52+ * The inputstream to read from.
53+ * @param length
54+ * The length of the sequence that will follow this header (will be set).
55+ * @param bit_length
56+ * The amount of bits for every value element in the following sequence (will be set).
57+ *
58+ * @return Returns true if there is data after this header. (first bit was set)
59+ */
1460template <class T >
15- algo::Huffman<T>::~Huffman (void ) {
16- this ->deleteTree (this ->tree_root );
61+ bool algo::Huffman<T>::read_huffman_dict_header(util::BitStreamReader& reader, uint32_t & length, uint32_t & bit_length) {
62+ if (reader.get_bit ()) {
63+ length = reader.get (algo::Huffman<T>::DICT_HDR_SEQ_LENGTH_BITS);
64+ bit_length = reader.get (algo::Huffman<T>::DICT_HDR_ITEM_BITS);
65+ return true ;
66+ }
67+
68+ return false ;
1769}
1870
71+ /* *
72+ * @brief Deallocate every node in the given tree.
73+ * @param root
74+ * The node to start with and delete its children.
75+ */
1976template <class T >
2077void algo::Huffman<T>::deleteTree(algo::Node<> *root) {
2178 if (root == nullptr ) return ;
@@ -34,30 +91,26 @@ void algo::Huffman<T>::deleteTree(algo::Node<> *root) {
3491 * The current stream of bits for a path in the tree.
3592 */
3693template <class T >
37- size_t algo::Huffman<T>::buildDict(const algo::Node<> * const node, std::vector<bool > stream) {
94+ void algo::Huffman<T>::buildDict(const algo::Node<> * const node, std::vector<bool > stream) {
3895 if (node == nullptr ) {
39- return 0u ;
96+ return ;
4097 }
4198
4299 // Check if leaf
43100 if (node->left == nullptr && node->right == nullptr ) {
44- const uint32_t size = uint32_t (stream.size ());
45-
46101 this ->dict [node->data ] = Codeword {
47102 std::accumulate (stream.begin (), stream.end (), uint32_t (0u ),
48103 [=](uint32_t x, uint32_t y) { return (x << 1u ) | y; }),
49- size
104+ uint32_t (stream. size ())
50105 };
51-
52- return size;
53106 }
54107
55108 std::vector<bool > lstream (stream);
56109 lstream.push_back (false );
57110 stream.push_back (true );
58111
59- return std::max ( this ->buildDict (node->left , lstream),
60- this ->buildDict (node->right , stream) );
112+ this ->buildDict (node->left , lstream);
113+ this ->buildDict (node->right , stream);
61114}
62115
63116/* *
@@ -89,6 +142,18 @@ void algo::Huffman<T>::decode(const algo::Node<> * const node, util::BitStreamRe
89142 }
90143}
91144
145+ // //////////////////////////////////////////////////////////////////////////////
146+
147+ template <class T >
148+ algo::Huffman<T>::Huffman(void ) {
149+
150+ }
151+
152+ template <class T >
153+ algo::Huffman<T>::~Huffman (void ) {
154+ this ->deleteTree (this ->tree_root );
155+ }
156+
92157/* *
93158 * @brief Encode bits of length sizeof(T) with Huffman encoding and
94159 * write the Huffman dict and the encoded data to an outputstream.
@@ -98,7 +163,7 @@ void algo::Huffman<T>::decode(const algo::Node<> * const node, util::BitStreamRe
98163 * @return Returns a new bitstream with the encoded data.
99164 */
100165template <class T >
101- util::BitStreamWriter* algo::Huffman<T>::encode(util::BitStreamReader& reader) {
166+ util::BitStreamWriter* algo::Huffman<T>::encode(util::BitStreamReader& reader) {
102167 const size_t length = reader.get_size () * 8u ;
103168
104169 // Calculate frequencies
@@ -115,6 +180,8 @@ util::BitStreamWriter* algo::Huffman<T>::encode(util::BitStreamReader& reader) {
115180
116181 for (const auto & pair: freqs) {
117182 pq.push (util::allocVar<algo::Node<>>(pair.first , pair.second ));
183+
184+ util::Logger::WriteLn (std::string_format (" %02X: %d" , pair.first , pair.second ), false );
118185 }
119186
120187 while (pq.size () > 1 ) {
@@ -128,67 +195,113 @@ util::BitStreamWriter* algo::Huffman<T>::encode(util::BitStreamReader& reader) {
128195
129196 this ->tree_root = pq.top ();
130197
131- const size_t h_table_bits = this ->buildDict (this ->tree_root , std::vector<bool >());
132- const size_t h_dict_total_length = (algo::Huffman<>::KEY_BITS + h_table_bits)
133- * this ->dict .size () // Every {key: val} pair
134- + algo::Huffman<>::KEY_BITS // Length of table itself
135- + algo::Huffman<>::SIZE_BITS; // Bits per value
198+ this ->buildDict (this ->tree_root , std::vector<bool >());
136199
137- util::Logger::WriteLn ( std::string_format ( " [Huffman] {key:%d, val:%d} for %d entries + %d hdr bits (%.1f total bytes). " ,
138- algo::Huffman<>::KEY_BITS, h_table_bits, this -> dict . size (),
139- (algo::Huffman<>::KEY_BITS + algo::Huffman<>::SIZE_BITS),
140- float (h_dict_total_length) / 8 . 0f ));
200+ // Create new list with dict elements sorted by bit length for saving to stream
201+ // Sort the dictionary by value bit length
202+ std::vector<std::pair< uint8_t , algo::Codeword>> sorted_dict ( this -> dict . begin (), this -> dict . end ());
203+ std::sort (sorted_dict. begin (), sorted_dict. end (), CodewordComparator ( ));
141204
142- util::BitStreamWriter *writer = util::allocVar<util::BitStreamWriter>((h_dict_total_length + length) / 8 + 1 );
205+ // Determine frequencies of each bit length with {bit_length: freq}
206+ std::unordered_map<uint32_t , uint32_t > bit_freqs;
207+ for (const auto & w : sorted_dict) {
208+ bit_freqs[w.second .len ]++;
209+ }
143210
144- writer->put (algo::Huffman<>::KEY_BITS , uint32_t (this ->dict .size ())); // /< Put table size
145- writer->put (algo::Huffman<>::SIZE_BITS, uint32_t (h_table_bits)); // /< Put bit length of a table value
211+ // Calculate total needed length for dict
212+ size_t h_dict_total_length = (algo::Huffman<>::KEY_BITS * this ->dict .size ()) // Amount of bits needed for keys
213+ + ((algo::Huffman<>::DICT_HDR_HAS_ITEMS_BITS + algo::Huffman<>::DICT_HDR_ITEM_BITS + algo::Huffman<>::DICT_HDR_SEQ_LENGTH_BITS)
214+ * bit_freqs.size ()) // Amount of bits for each header
215+ + 1 ; // Stop bit
216+ for (const auto & f : bit_freqs) {
217+ h_dict_total_length += f.first * f.second ; // Amount of bits for each header group
218+ }
146219
147- for (const auto & pair : this ->dict ) {
148- writer->put (algo::Huffman<>::KEY_BITS, pair.first ); // Put Key
149- writer->put (h_table_bits, pair.second .word ); // Put Val
220+ util::Logger::WriteLn (std::string_format (" [Huffman] Dict{key:%d, val:*} for %d entries + hdr bits: %.1f total bytes." ,
221+ algo::Huffman<>::KEY_BITS, this ->dict .size (),
222+ float (h_dict_total_length) / 8 .0f ));
223+
224+ // *** Save the Huffman dictionary to a stream ***//
225+ util::BitStreamWriter *writer = util::allocVar<util::BitStreamWriter>((h_dict_total_length + length) / 8 + 1 );
226+ uint32_t seq_len = 0u , bit_len = 0u ;
227+
228+ // Add headers for each group of same length key:val pairs
229+ // and write them to the stream
230+ for (const auto & w : sorted_dict) {
231+ if (seq_len == 0 ) {
232+ // New group
233+ bit_len = w.second .len ;
234+ seq_len = bit_freqs[bit_len];
235+ add_huffman_dict_header (seq_len, bit_len, *writer);
236+ }
237+
238+ writer->put (algo::Huffman<>::KEY_BITS, w.first ); // Put Key
239+ writer->put (bit_len, w.second .word ); // Put Val
240+ seq_len--;
150241 }
151242
243+ add_huffman_dict_header (0 , 0 , *writer);
244+
152245
153246 /* ******************************************************************************/
154247
155248 /* ori*/
156249 reader.set_position (0 );
157- while (reader.get_position () != length) {
158- const T word = T (reader.get (algo::Huffman<>::KEY_BITS));
159- util::Logger::Write (std::string_format (" %X" , word), false );
160- } util::Logger::WriteLn (std::string_format (" (%d bytes)" , length/8 ), false );
250+ // while(reader.get_position() != length) {
251+ // const T word = T(reader.get(algo::Huffman<>::KEY_BITS));
252+ // util::Logger::Write(std::string_format("%X", word), false);
253+ // }
254+ util::Logger::WriteLn (std::string_format (" (%d bytes)" , length/8 ), false );
161255
162256 /* encoded*/
257+ // Encode
163258 reader.set_position (0 );
164259 while (reader.get_position () != length) {
165260 const T word = T (reader.get (algo::Huffman<>::KEY_BITS));
166- util::Logger::Write (std::string_format (" %X" , this ->dict [word]), false );
167-
168261 writer->put (this ->dict [word].len , this ->dict [word].word ); // TODO
169- } util::Logger::WriteLn (" " , false );
262+ }
263+
264+ /* encoded stream*/
265+ size_t len = writer->get_position () / 8 ;
266+ // for (size_t i = 0; i < len; i++) {
267+ // util::Logger::Write(std::string_format("%X", writer->get_buffer()[i]), false);
268+ // }
269+ util::Logger::WriteLn (std::string_format (" (%d bytes)" , len), false );
170270
171271 /* decoded*/
172272 util::BitStreamReader enc (writer->get_buffer (), (writer->get_position () / 8 ) + 1 );
173- size_t table_size = enc.get (algo::Huffman<>::KEY_BITS);
174- size_t entry_bits = enc.get (algo::Huffman<>::SIZE_BITS);
175- enc.set_position (enc.get_position () + (algo::Huffman<>::KEY_BITS + entry_bits) * table_size);
176273
177- util::BitStreamWriter out (length/8 );
274+ // readDictFromStream(enc);
275+ uint32_t dseq_len = 0u , dbit_len = 0u ;
276+ this ->dict .clear ();
277+ // this->deleteTree(this->tree_root);
178278
179- while (enc.get_position () <= enc.get_size () * 8u ) {
180- this ->decode (this ->tree_root , enc, out);
181- } util::Logger::WriteLn (" " , false );
279+ while (this ->read_huffman_dict_header (enc, dseq_len, dbit_len)) { // While header is followed by sequence
280+ while (dseq_len--) { // For each element, read {key, val}
281+ this ->dict [T (enc.get (algo::Huffman<>::KEY_BITS))] = Codeword { enc.get (dbit_len), dbit_len };
282+ // TODO Add element to tree
283+ }
284+ }
182285
183- out.set_position (0 );
184- for (size_t i = 0 ; i < out.get_size (); i++) {
185- util::Logger::Write (std::string_format (" %X" , out.get_buffer ()[i]), false );
186- } util::Logger::WriteLn (" " , false );
286+ // util::BitStreamWriter out(length/8);
287+
288+ // while (enc.get_position() <= enc.get_size() * 8u) {
289+ // this->decode(this->tree_root, enc, out);
290+ // } util::Logger::WriteLn("", false);
291+
292+ // out.set_position(0);
293+ // for (size_t i = 0; i < out.get_size(); i++) {
294+ // util::Logger::Write(std::string_format("%X", out.get_buffer()[i]), false);
295+ // } util::Logger::WriteLn("", false);
296+
297+ // util::Logger::WriteLn("", false);
298+ // this->printTree();
299+ // util::Logger::WriteLn("", false);
187300
188- util::Logger::WriteLn (" " , false );
189- this ->printTree ();
190301 util::Logger::WriteLn (" " , false );
191302
303+ this ->printDict ();
304+
192305 return writer;
193306}
194307
@@ -203,12 +316,16 @@ util::BitStreamWriter* algo::Huffman<T>::encode(util::BitStreamReader& reader) {
203316template <class T >
204317util::BitStreamWriter* algo::Huffman<T>::decode(util::BitStreamReader& reader) {
205318 const size_t table_size = reader.get (algo::Huffman<>::KEY_BITS); // /< Get table size
206- const size_t entry_bits = reader.get (algo::Huffman<>::SIZE_BITS); // /< Get entry bit length
207319 const size_t data_bits = reader.get_size () * 8u ; // /< Amount of data bits
208320
209- for (size_t i = 0 ; i < table_size; i++) {
210- this ->dict [T (reader.get (algo::Huffman<>::KEY_BITS))] = Codeword { reader.get (entry_bits), 0u };
211- }
321+
322+ // TODO if first bit is zero => no Huffman table => do nothing, just pass the stream back
323+ // use internal flag to enable Huffman, if disabled, write 1 zero to stream before data,
324+ // and later just call huffman.decode() (see TODO this TODO)
325+
326+ // for (size_t i = 0; i < table_size; i++) {
327+ // this->dict[T(reader.get(algo::Huffman<>::KEY_BITS))] = Codeword { reader.get(entry_bits), 0u };
328+ // }
212329
213330
214331 // TODO Create tree from dict
0 commit comments