3636my $help = 0;
3737my $decompress = 0;
3838my $export_files = 0;
39- # my $output_file = 0;
39+ my $log_file_handle = 0;
40+
4041
4142# Set up variables for what we mine
4243my $total_identified_blobs = 0;
7677 exit ();
7778}
7879
80+ #
7981# See if we have a directory to work on
82+ #
8083if ($input_directory ) {
8184
8285 # Make directory absolute
8790 die " Directory $input_directory does not exist\n " ;
8891 }
8992
90- # Tell the user what we're doing
91- print " Search Directory: $input_directory \n " ;
92-
9393 # Capture the lowest level folder name for the output directory
9494 my $tmp_folder_name = " folder_search" ;
9595 if ($input_directory =~ / ([^\\\/ ]+)$ / ) {
9696 $tmp_folder_name = $1 ;
9797 }
9898
9999 $output_directory = create_run_output_directory($output_directory , $tmp_folder_name , 1);
100- $results_file = create_results_file($output_directory );
100+ $log_file_handle = open_log_file($output_directory );
101+ $results_file = create_results_file($output_directory , $log_file_handle );
102+
103+ # Tell the user what we're doing
104+ print_log_line($log_file_handle , " Search Directory: $input_directory \n " );
101105
102106 # Pull out all potential SQLite files (based on file itself)
103107 my @files_to_mine ;
104108 find(
105109 sub {
106110 if (! -d $_ && file_is_sqlite($_ )) {
107111 my $tmp_filepath = $File::Find::name ;
108- print " Found SQLite: $tmp_filepath ! \n " if $verbose ;
112+ print_log_line_if( $log_file_handle , " Found SQLite: $tmp_filepath \n " , $verbose ) ;
109113 push (@files_to_mine , $tmp_filepath );
110114 }
111115 },
112116 $input_directory
113117 );
114- print " \n " if $verbose ;
118+ print_log_line_if( $log_file_handle , " \n " , $verbose ) ;
115119 foreach $tmp_file (sort (@files_to_mine )){
116120
117121 # Remember how many blobs we're currently at
118122 my $current_blob_count = $total_identified_blobs ;
119123
120124 # Run the parsing and store the export folder
121- my $tmp_run_folder = mine_file($output_directory , $tmp_file , $results_file , 1);
125+ my $tmp_run_folder = mine_file($output_directory , $tmp_file , $results_file , 1, $log_file_handle );
122126
123127 # Remove the copied files if we didn't actually do any work with them
124128 if ($total_identified_blobs == $current_blob_count ) {
125- File::Path-> remove_tree(File::Spec-> abs2rel($tmp_run_folder )) or die " Can't remove $tmp_run_folder - $! \n " ;
129+ File::Path-> remove_tree(File::Spec-> abs2rel($tmp_run_folder ));
126130 }
127131 }
128132}
129133
134+ #
130135# See if we have a file to work on
136+ #
131137if ($original_file ) {
132138 # Check to ensure the file actually exists
133139 if (! -f $original_file ) {
134140 die " File $original_file does not exist\n " ;
135141 }
136142
137143 $output_directory = create_run_output_directory($output_directory , $original_file , 1);
138-
139- $results_file = create_results_file($output_directory );
144+ $log_file_handle = open_log_file( $output_directory );
145+ $results_file = create_results_file($output_directory , $log_file_handle );
140146
141147 # Do work, son
142- mine_file($output_directory , $original_file , $results_file , 0);
148+ mine_file($output_directory , $original_file , $results_file , 0, $log_file_handle );
143149}
144150
145151# Finish up the timing
146152my $end_time = time ;
147153my $run_time = sprintf (" %.4f" , $end_time - $start_time );
148154
149155# Give the user some feedback
150- print_final_results();
156+ print_final_results($log_file_handle );
157+ close ($log_file_handle );
151158
152159exit ;
153160
154161# ###################
155162# Functions follow #
156163# ###################
157164
165+ # Function to log a line of text
166+ # Function takes the text to log and a file handle for the log file
167+ sub log_line {
168+ my $log_file_handle = @_ [0];
169+ my $line = @_ [1];
170+
171+ if (!$log_file_handle ) {
172+ die " Bad log file handle provided, exiting. Tried to print: $line \n " ;
173+ }
174+
175+ print $log_file_handle $line ;
176+ }
177+
178+ # Function to print and log a line of text
179+ # Function takes the text to print and log and a file handle for the log file
180+ sub print_log_line {
181+ my $log_file_handle = @_ [0];
182+ my $line = @_ [1];
183+
184+ print STDOUT $line ;
185+ log_line($log_file_handle , $line );
186+ }
187+
188+ # Function to print and log a line of text if a boolean is true
189+ # Function takes the text to print and log, a file handle for the log file, and a boolean
190+ sub print_log_line_if {
191+ my $log_file_handle = @_ [0];
192+ my $line = @_ [1];
193+ my $condition = @_ [2];
194+
195+ if ($condition ) {
196+ print_log_line($log_file_handle , $line );
197+ }
198+ }
199+
200+ # Function to print and log a line of text before dying
201+ # Function takes the text to print and log and a file handle for the log file
202+ sub print_log_die {
203+ my $log_file_handle = @_ [0];
204+ my $line = @_ [1];
205+
206+ print_log_line($log_file_handle , $line );
207+ die ;
208+ }
209+
158210# Function that identifies a SQLite file
159211# Function expects a path
160212# Function returns a boolean
@@ -180,10 +232,11 @@ sub file_is_sqlite {
180232# Function requires a path to the run folder and that's it
181233sub create_results_file {
182234 my $run_folder = @_ [0];
235+ my $log_file_handle = @_ [1];
183236
184237 # Create the output file and spit the head to it
185238 my $output_file = File::Spec-> catfile($run_folder , " results.csv" );
186- open (RESULT_OUTPUT, " >$output_file " ) or die " Can't open $output_file to write results\n " ;
239+ open (RESULT_OUTPUT, " >$output_file " ) or print_log_die( $log_file_handle , " Can't open $output_file to write results\n " ) ;
187240 print RESULT_OUTPUT " \" Directory\" ,\" Database\" ,\" Table\" ,\" Column\" ,\" Primary Key Column\" ,\" Index\" ,\" File Type\" " ;
188241 if ($export_files ) {
189242 print RESULT_OUTPUT " ,\" Export Filename\" " ;
@@ -195,6 +248,17 @@ sub create_results_file {
195248 return $output_file ;
196249}
197250
251+ # Function to create the log file
252+ # Function requires a path to the run folder
253+ # Function returns a file handle
254+ sub open_log_file {
255+ my $run_folder = @_ [0];
256+ my $log_file = File::Spec-> catfile($run_folder , " log.txt" );
257+ open (LOG_OUTPUT, " >$log_file " ) or die " Can't open $log_file - $! \n " ;;
258+ log_line(LOG_OUTPUT, " Log file opened - " .File::Spec-> abs2rel($log_file )." \n " );
259+ return LOG_OUTPUT;
260+ }
261+
198262# Function to handle mining one file
199263# Function expects the output directory and original filename
200264# Function returns the run folder
@@ -203,6 +267,7 @@ sub mine_file {
203267 my $original_file = @_ [1];
204268 my $output_file = @_ [2];
205269 my $is_directory_run = @_ [3];
270+ my $log_file_handle = @_ [4];
206271
207272 $total_files += 1;
208273
@@ -214,7 +279,7 @@ sub mine_file {
214279 }
215280
216281 if (!$output_file ) {
217- $output_file = create_results_file($run_folder );
282+ $output_file = create_results_file($run_folder , $log_file_handle );
218283 }
219284
220285 # Make sure we don't mess up our original
@@ -228,29 +293,29 @@ sub mine_file {
228293 }
229294 my $output_db_file = File::Spec-> catfile($run_folder ,$output_db_file );
230295 copy($original_file , $output_db_file ) or die " Can't copy $original_file to $output_db_file - $! \n " ;
231- print " SQLite file: " .File::Spec-> abs2rel($output_db_file )." \n " if $verbose ;
296+ print_log_line_if( $log_file_handle , " SQLite file: " .File::Spec-> abs2rel($output_db_file )." \n " , $verbose ) ;
232297
233298 # Make a folder to export files to, if desired
234299 if ($export_files ) {
235300 $export_directory = File::Spec-> catdir($run_folder , " exports" );
236301 mkdir $export_directory ;
237- print " Export folder: " .File::Spec-> abs2rel($export_directory )." \n " if $verbose ;
302+ print_log_line_if( $log_file_handle , " Export folder: " .File::Spec-> abs2rel($export_directory )." \n " , $verbose ) ;
238303 }
239304
240305 # Set up database connection
241306 my $dsn = " DBI:SQLite:dbname=$output_db_file " ;
242- my $dbh = DBI-> connect ($dsn ) or die " Cannot open $output_db_file \n " ;
307+ my $dbh = DBI-> connect ($dsn ) or print_log_die( $log_file_handle , " Cannot open $output_db_file \n " ) ;
243308
244- print " Mining: " .File::Spec-> abs2rel($output_db_file )." \n " ;
309+ print_log_line( $log_file_handle , " Mining: " .File::Spec-> abs2rel($output_db_file )." \n " ) ;
245310
246- print " \n " if $verbose ;
311+ print_log_line_if( $log_file_handle , " \n " , $verbose ) ;
247312
248313 # Fetch the table information
249314 my %table_information = get_table_information($dbh );
250315
251316 # Identify possibly interesting blob columns
252317 foreach $table (sort (keys (%table_information ))) {
253- print " Investigating $table \n " if $verbose ;
318+ print_log_line_if( $log_file_handle , " Investigating $table \n " , $verbose ) ;
254319
255320 # Break this table out into schema and table name
256321 (my $schema , my $table_name ) = normalize_table_name($table );
@@ -262,7 +327,7 @@ sub mine_file {
262327 my %tmp_table = %{$table_information {$table }};
263328 foreach $column (keys (%tmp_table )) {
264329 if ($table_information {$table }{$column } eq " BLOB" ) {
265- check_column_for_fun($dbh , $output_db_file , $table , $column , @primary_key_columns );
330+ check_column_for_fun($dbh , $output_db_file , $table , $column , $log_file_handle , @primary_key_columns );
266331 }
267332 }
268333 }
@@ -307,11 +372,12 @@ sub count_mined_blob {
307372# Function needs to be provided a database handle, table name, column name, and array of primary keys
308373# Function will return nothing (yet)
309374sub check_column_for_fun {
310- my $local_dbh = @_ [0];
311- my $file_name = @_ [1];
312- my $table_name = @_ [2];
313- my $column_name = @_ [3];
314- my @primary_keys = @_ [4];
375+ my $local_dbh = @_ [0];
376+ my $file_name = @_ [1];
377+ my $table_name = @_ [2];
378+ my $column_name = @_ [3];
379+ my $log_file_handle = @_ [4];
380+ my @primary_keys = @_ [5];
315381
316382 # Get the real table name
317383 (my $tmp_schema , my $tmp_table_name ) = normalize_table_name($table_name );
@@ -354,14 +420,14 @@ sub check_column_for_fun {
354420 }
355421
356422 # Display output, if relevant
357- print " \t $file_type : Possibly found in $column_name " if $verbose ;
423+ print_log_line_if( $log_file_handle , " \t $file_type : Possibly found in $column_name " , $verbose ) ;
358424 $total_identified_blobs += 1;
359425 count_mined_blob(File::Spec-> abs2rel($file_name ), $tmp_table_name , $column_name , $file_type );
360426
361427 if ($primary_key_column ) {
362- print " when $primary_key_column =$tmp_primary_key \n " if $verbose ;
428+ print_log_line_if( $log_file_handle , " when $primary_key_column =$tmp_primary_key \n " , $verbose ) ;
363429 } else {
364- print " (no primary key)\n " if $verbose ;
430+ print_log_line_if( $log_file_handle , " (no primary key)\n " , $verbose ) ;
365431 }
366432
367433 # Print out to the target CSV file
@@ -394,7 +460,7 @@ sub check_column_for_fun {
394460
395461 # Export the file
396462 (my $tmp_export_volume_for_output , my $tmp_export_directory_for_output , my $tmp_export_filename_for_output ) = File::Spec-> splitpath($tmp_export_file_path );
397- print " \t Exporting file as $tmp_export_filename_for_output \n " if $very_verbose ;
463+ print_log_line_if( $log_file_handle , " \t Exporting file as $tmp_export_filename_for_output \n " , $very_verbose ) ;
398464 open (OUTPUT, " >$tmp_export_file_path " );
399465 binmode (OUTPUT);
400466 print OUTPUT $tmp_data_blob ;
@@ -414,16 +480,16 @@ sub check_column_for_fun {
414480 my $tmp_update_query = " UPDATE $table_name SET $column_name =? WHERE $primary_key_column =?" ;
415481 my $tmp_update_query_handler = $local_dbh -> prepare($tmp_update_query );
416482 $tmp_update_query_handler -> execute($tmp_new_blob , $tmp_primary_key );
417- print " \t Updated $column_name in $table_name with decompressed data when $primary_key_column =$tmp_primary_key \n " if $very_verbose ;
483+ print_log_line_if( $log_file_handle , " \t Updated $column_name in $table_name with decompressed data when $primary_key_column =$tmp_primary_key \n " , $very_verbose ) ;
418484 $decompressed_anything = 1;
419485 } elsif (length ($tmp_new_blob ) > 0 and !$tmp_primary_key ) {
420486 my $tmp_update_query = " UPDATE $table_name SET $column_name =?" ;
421487 my $tmp_update_query_handler = $local_dbh -> prepare($tmp_update_query );
422488 $tmp_update_query_handler -> execute($tmp_new_blob );
423- print " \t Updated $column_name in $table_name with decompressed data (no primary key)\n " if $very_verbose ;
489+ print_log_line_if( $log_file_handle , " \t Updated $column_name in $table_name with decompressed data (no primary key)\n " , $very_verbose ) ;
424490 $decompressed_anything = 1;
425491 } else {
426- print " \t Not updating $column_name in $table_name with decompressed data due to likely bad decompression\n " if $very_verbose ;
492+ print_log_line_if( $log_file_handle , " \t Not updating $column_name in $table_name with decompressed data due to likely bad decompression\n " , $very_verbose ) ;
427493 }
428494 print RESULT_OUTPUT " ,\" Decompressed\" " ;
429495 }
@@ -528,6 +594,7 @@ sub create_run_output_directory {
528594
529595# Function to print our run results
530596sub print_final_results {
597+ my $log_file_handle = @_ [0];
531598
532599 # Tell the user what we did
533600 my $identify_stats = " $total_identified_blobs potential blob file" ;
@@ -543,33 +610,33 @@ sub print_final_results {
543610 }
544611
545612 my $stat_line = " $total_files SQLite file" ;
546- if ($total_files >= 1) {
613+ if ($total_files > 1) {
547614 $stat_line .= " s" ;
548615 }
549616
550- print " \n #######################################################\n " ;
551- print " $stat_line mined, $identify_stats in $run_time seconds.\n " ;
552- print " Result file: " .File::Spec-> abs2rel($results_file )." \n " ;
617+ print_log_line( $log_file_handle , " \n #######################################################\n " ) ;
618+ print_log_line( $log_file_handle , " $stat_line mined, $identify_stats in $run_time seconds.\n " ) ;
619+ print_log_line( $log_file_handle , " Result file: " .File::Spec-> abs2rel($results_file )." \n " ) ;
553620
554621 # Loop over all files
555622 foreach $file_name (sort (keys (%mined_blobs ))) {
556- print " \n $file_name \n " ;
623+ print_log_line( $log_file_handle , " \n $file_name \n " ) ;
557624 foreach $table_name (sort (keys (%{$mined_blobs {$file_name }}))) {
558- print " \t $table_name table:\n " ;
625+ print_log_line( $log_file_handle , " \t $table_name table:\n " ) ;
559626 foreach $column_name (keys (%{$mined_blobs {$file_name }{$table_name }})) {
560- print " \t\t $column_name column: " ;
627+ print_log_line( $log_file_handle , " \t\t $column_name column: " ) ;
561628 my $file_types = 0;
562629 foreach $file_type (keys (%{$mined_blobs {$file_name }{$table_name }{$column_name }})) {
563630 $count = $mined_blobs {$file_name }{$table_name }{$column_name }{$file_type };
564- print " , " if $file_types ;
565- print " $count $file_type " ;
631+ print_log_line_if( $log_file_handle , " , " , $file_types ) ;
632+ print_log_line( $log_file_handle , " $count $file_type " ) ;
566633 $file_types += 1;
567634 }
568- print " \n " ;
635+ print_log_line( $log_file_handle , " \n " ) ;
569636 }
570637 }
571638 }
572- print " #######################################################\n " ;
639+ print_log_line( $log_file_handle , " #######################################################\n " ) ;
573640}
574641
575642# Function to print run header
0 commit comments