77import java .io .FileInputStream ;
88import java .io .FileOutputStream ;
99import java .io .IOException ;
10- import java .io .PrintStream ;
1110import java .util .PriorityQueue ;
1211
12+ import prog .lzw .LzwCompressor ;
13+
1314//if the frequency of a byte is more than 2^32 then there will be problem
1415public class HuffmanCompressor {
15-
16- static PriorityQueue <HuffmanNode > priorityQueue = new PriorityQueue <HuffmanNode >();
17- static int [] frequency = new int [300 ];
18- static String [] huffmanCodes = new String [300 ];
1916 static int extraBits ;
2017 static byte currentByte ;
2118 static int uniqueCharCount ; // number of different characters
2219
23- // for keeping frequncies of all the bytes
24-
25- // main tree class
26-
27- static HuffmanNode root ;
28-
29- /*******************************************************************************
30- * calculating frequence of file filename
31- ******************************************************************************/
32- public static void calculateFrequencyOfBytesInFile (String filename ) {
33- File file = new File (filename );;
34- Byte currentByte ;
35- try {
36- FileInputStream file_input = new FileInputStream (file );
37- DataInputStream data_in = new DataInputStream (file_input );
38- while (true ) {
39- try {
40-
41- currentByte = data_in .readByte ();
42- frequency [HuffmanUtils .to (currentByte )]++;
43- } catch (EOFException eof ) {
44- System .out .println ("End of File" );
45- break ;
46- }
47- }
48- file_input .close ();
49- data_in .close ();
50- } catch (IOException e ) {
51- System .out .println ("IO Exception =: " + e );
52- }
53- }
54-
55- /************************************** ============ ************************/
56-
57- /***********************************************************************************
58- * byte to binary conversion
59- ***********************************************************************************/
60- public static int to (Byte b ) {
61- int ret = b ;
62- if (ret < 0 ) {
63- ret = ~b ;
64- ret = ret + 1 ;
65- ret = ret ^ 255 ;
66- ret += 1 ;
67- }
68- return ret ;
69- }
70-
71- /***********************************************************************************/
72-
7320 /**********************************************************************************
7421 * freing the memory
7522 *********************************************************************************/
76- public static void initHuffmanCompressor () {
23+ public static void initHuffmanCompressor (HuffmanNode root ) {
7724 int i ;
7825 uniqueCharCount = 0 ;
7926 if (root != null )
8027 HuffmanUtils .fredfs (root , HuffmanUtils .HUFFMAN_TREE_ACCESSOR );
81- for (i = 0 ; i < 300 ; i ++)
82- frequency [i ] = 0 ;
83- for (i = 0 ; i < 300 ; i ++)
84- huffmanCodes [i ] = "" ;
85- priorityQueue .clear ();
8628 }
8729
8830 /**********************************************************************************/
@@ -106,26 +48,26 @@ public static void fredfs(HuffmanNode node) {
10648 /**********************************************************************************
10749 * dfs to make the codes
10850 *********************************************************************************/
109- public static void generateHuffmanCodes (HuffmanNode node , String code ) {
51+ public static void generateHuffmanCodes (HuffmanNode node , String code , String [] huffmanCodes ) {
11052 node .code = code ;
11153 if ((node .leftChild == null ) && (node .rightChild == null )) {
11254 huffmanCodes [node .byteValue ] = code ;
11355 return ;
11456 }
11557 if (node .leftChild != null )
116- generateHuffmanCodes (node .leftChild , code + "0" );
58+ generateHuffmanCodes (node .leftChild , code + "0" , huffmanCodes );
11759 if (node .rightChild != null )
118- generateHuffmanCodes (node .rightChild , code + "1" );
60+ generateHuffmanCodes (node .rightChild , code + "1" , huffmanCodes );
11961 }
12062
12163 /**********************************************************************************/
12264
12365 /*******************************************************************************
12466 * Making all the nodes in a priority Q making the tree
12567 *******************************************************************************/
126- public static void buildHuffmanTree () {
68+ public static HuffmanNode buildHuffmanTree (int [] frequency , String [] huffmanCodes ) {
12769 int i ;
128- priorityQueue . clear ();
70+ PriorityQueue < HuffmanNode > priorityQueue = new PriorityQueue < HuffmanNode > ();
12971
13072 for (i = 0 ; i < 300 ; i ++) {
13173 if (frequency [i ] != 0 ) {
@@ -142,14 +84,14 @@ public static void buildHuffmanTree() {
14284 HuffmanNode Temp1 , Temp2 ;
14385
14486 if (uniqueCharCount == 0 ) {
145- return ;
87+ return null ;
14688 } else if (uniqueCharCount == 1 ) {
14789 for (i = 0 ; i < 300 ; i ++)
14890 if (frequency [i ] != 0 ) {
14991 huffmanCodes [i ] = "0" ;
15092 break ;
15193 }
152- return ;
94+ return null ;
15395 }
15496
15597 // will there b a problem if the file is empty
@@ -163,155 +105,87 @@ public static void buildHuffmanTree() {
163105 Temp .frequency = Temp1 .frequency + Temp2 .frequency ;
164106 priorityQueue .add (Temp );
165107 }
166- root = priorityQueue .poll ();
167- }
168-
169- /*******************************************************************************/
170-
171- /*******************************************************************************
172- * encrypting
173- *******************************************************************************/
174- public static void encrypt (String filename ) {
175- File file = null ;
176-
177- file = new File (filename );
178- try {
179- FileInputStream file_input = new FileInputStream (file );
180- DataInputStream data_in = new DataInputStream (file_input );
181- while (true ) {
182- try {
183-
184- currentByte = data_in .readByte ();
185- frequency [currentByte ]++;
186- } catch (EOFException eof ) {
187- System .out .println ("End of File" );
188- break ;
189- }
190- }
191- file_input .close ();
192- data_in .close ();
193-
194- } catch (IOException e ) {
195- System .out .println ("IO Exception =: " + e );
196- }
197- file = null ;
108+ HuffmanNode root = priorityQueue .poll ();
109+ return root ;
198110 }
199111
200112 /*******************************************************************************/
201113
202- /*******************************************************************************
203- * fake zip creates a file "fakezip.txt" where puts the final binary codes
204- * of the real zipped file
205- *******************************************************************************/
206- public static void fakezip (String filename ) {
207114
115+ public static void zip (String filename , String filename1 , int [] frequency , String [] huffmanCodes ) {
208116 File filei , fileo ;
209117 int i ;
210-
211- filei = new File (filename );
212- fileo = new File ("fakezipped.txt" );
213- try {
214- FileInputStream file_input = new FileInputStream (filei );
215- DataInputStream data_in = new DataInputStream (file_input );
216- PrintStream ps = new PrintStream (fileo );
217-
218- while (true ) {
219- try {
220- currentByte = data_in .readByte ();
221- ps .print (huffmanCodes [to (currentByte )]);
222- } catch (EOFException eof ) {
223- System .out .println ("End of File" );
224- break ;
225- }
226- }
227-
228- file_input .close ();
229- data_in .close ();
230- ps .close ();
231-
232- } catch (IOException e ) {
233- System .out .println ("IO Exception =: " + e );
234- }
235- filei = null ;
236- fileo = null ;
237-
238- }
239-
240- /*******************************************************************************/
241-
242- /*******************************************************************************
243- * real zip according to codes of fakezip.txt (filename)
244- *******************************************************************************/
245- public static void realzip (String filename , String filename1 ) {
246- File filei , fileo ;
247- int i , j = 10 ;
248118 Byte currentBytet ;
249119
250120 filei = new File (filename );
251121 fileo = new File (filename1 );
252122
123+ DataInputStream dataIn ;
124+ DataOutputStream dataOut ;
125+
253126 try {
127+
254128 FileInputStream file_input = new FileInputStream (filei );
255- DataInputStream data_in = new DataInputStream (file_input );
129+ dataIn = new DataInputStream (file_input );
256130 FileOutputStream file_output = new FileOutputStream (fileo );
257- DataOutputStream data_out = new DataOutputStream (file_output );
131+ dataOut = new DataOutputStream (file_output );
132+
133+ // Step1: Write the table size
134+ dataOut .writeInt (uniqueCharCount );
258135
259- data_out . writeInt ( uniqueCharCount );
136+ // Step2: Write the table
260137 for (i = 0 ; i < 256 ; i ++) {
261138 if (frequency [i ] != 0 ) {
262139 currentBytet = (byte ) i ;
263- data_out .write (currentBytet );
264- data_out .writeInt (frequency [i ]);
140+ dataOut .write (currentBytet );
141+ dataOut .writeInt (frequency [i ]);
265142 }
266143 }
267- long textraBits ;
268- textraBits = filei .length () % 8 ;
269- textraBits = (8 - textraBits ) % 8 ;
270- extraBits = (int ) textraBits ;
271- data_out .writeInt (extraBits );
144+
145+ // Step3: Because the table might have some padding to make it a multiple of 8,
146+ //we need to calculate the total number of binary digits mod 8
147+ int totalBinaryDigitsMod8 = 0 ;
148+ for (i = 0 ; i < 256 ; i ++) {
149+ if (huffmanCodes [i ] != null ) {
150+ totalBinaryDigitsMod8 += huffmanCodes [i ].length () * frequency [i ];
151+ totalBinaryDigitsMod8 %= 8 ;
152+ }
153+ }
154+ int extraBits = (8 - totalBinaryDigitsMod8 )%8 ;
155+ dataOut .writeInt (extraBits );
156+
157+
158+ String bitBuffer = "" ;
272159 while (true ) {
273160 try {
274- currentByte = 0 ;
275- byte ch ;
276- for (extraBits = 0 ; extraBits < 8 ; extraBits ++) {
277- ch = data_in .readByte ();
278- currentByte *= 2 ;
279- if (ch == '1' )
280- currentByte ++;
161+ byte currentByte = dataIn .readByte ();
162+ String huffmanCodeOfCurrentByte = huffmanCodes [HuffmanUtils .to (currentByte )];
163+ bitBuffer += huffmanCodeOfCurrentByte ;
164+ while (bitBuffer .length () >= 8 ) {
165+ dataOut .write (LzwCompressor .stringToByte (bitBuffer .substring (0 , 8 )));
166+ bitBuffer = bitBuffer .substring (8 , bitBuffer .length ());
281167 }
282- data_out .write (currentByte );
283-
284168 } catch (EOFException eof ) {
285169 int x ;
286- if (extraBits != 0 ) {
287- for (x = extraBits ; x < 8 ; x ++) {
288- currentByte *= 2 ;
289- }
290- data_out .write (currentByte );
170+ if (bitBuffer .length () != 0 ) {
171+ dataOut .write (LzwCompressor .stringToByte (bitBuffer ));
291172 }
292-
293- extraBits = (int ) textraBits ;
294- System .out .println ("extrabits: " + extraBits );
295- System .out .println ("End of File" );
296173 break ;
297174 }
298175 }
299- data_in .close ();
300- data_out .close ();
176+ dataIn .close ();
177+ dataOut .close ();
301178 file_input .close ();
302179 file_output .close ();
303180 System .out .println ("output file's size: " + fileo .length ());
304181
305182 } catch (IOException e ) {
306183 System .out .println ("IO exception = " + e );
307184 }
308- filei .delete ();
309185 filei = null ;
310186 fileo = null ;
311187 }
312188
313- /*******************************************************************************/
314-
315189 /*
316190 * public static void main (String[] args) { initHzipping();
317191 * CalFreq("in.txt"); // calculate the frequency of each digit MakeNode();
@@ -325,16 +199,13 @@ public static void realzip(String filename, String filename1) {
325199 */
326200
327201 public static void beginHuffmanCompression (String arg1 ) {
328- initHuffmanCompressor ();
329- calculateFrequencyOfBytesInFile (arg1 ); // calculate the frequency of each digit
330- buildHuffmanTree (); // build huffman tree from frequencies
202+ initHuffmanCompressor (null );
203+ int [] frequency = HuffmanUtils .calculateFrequencyOfBytesInFile (arg1 ); // calculate the frequency of each digit
204+ String [] huffmanCodes = new String [300 ];
205+ HuffmanNode huffmanTree = buildHuffmanTree (frequency , huffmanCodes ); // build huffman tree from frequencies
331206 if (uniqueCharCount > 1 )
332- generateHuffmanCodes (root , "" ); // dfs to make the codes
333- fakezip (arg1 ); // fake zip file which will have the binary of the input
334- // to fakezipped.txt file
335- realzip ("fakezipped.txt" , arg1 + ".huffz" ); // making the real zip
336- // according the fakezip.txt
337- // file
338- initHuffmanCompressor ();
207+ generateHuffmanCodes (huffmanTree , "" , huffmanCodes ); // dfs to make the codes
208+ zip (arg1 , arg1 + ".huffz" , frequency , huffmanCodes ); // compress the file directly
209+ initHuffmanCompressor (huffmanTree );
339210 }
340211}
0 commit comments