|
26 | 26 | use POSIX qw(strftime); |
27 | 27 | use Time::HiRes qw(time); |
28 | 28 |
|
29 | | -my $version = "1.2.2"; |
| 29 | +my $version = "1.2.3"; |
30 | 30 |
|
31 | 31 | # Set up initial variables |
32 | 32 | my $start_time = time; |
|
60 | 60 | 'verbose' => \$verbose, |
61 | 61 | 'very-verbose' => \$very_verbose, |
62 | 62 | 'output=s' => \$output_directory, |
| 63 | + 'protobufs' => \$protobufs, |
63 | 64 | 'help' => \$help); |
64 | 65 |
|
65 | 66 | # Set verbose if very-verbose was chosen |
@@ -564,6 +565,100 @@ sub check_column_for_fun { |
564 | 565 | } |
565 | 566 |
|
566 | 567 | } |
| 568 | + |
| 569 | + # If we want to do it the hard way... |
| 570 | + if($protobufs) { |
| 571 | + |
| 572 | + # Let's test them all for protobufs and hope it don't die |
| 573 | + my $tmp_query = $base_query . "WHERE $column_name NOT NULL"; |
| 574 | + |
| 575 | + # Build and execute query |
| 576 | + my $tmp_query_handler = $local_dbh->prepare($tmp_query); |
| 577 | + $tmp_query_handler->execute(); |
| 578 | + |
| 579 | + # Loop over all rows returned |
| 580 | + while(my @tmp_row = $tmp_query_handler->fetchrow_array()) { |
| 581 | + my $tmp_primary_key; |
| 582 | + my $tmp_data_blob; |
| 583 | + my $file_type = "protobuf"; |
| 584 | + |
| 585 | + # Rip out the data |
| 586 | + if($primary_key_column) { |
| 587 | + $tmp_primary_key = $tmp_row[0]; |
| 588 | + $tmp_data_blob = $tmp_row[1]; |
| 589 | + } else { |
| 590 | + $tmp_data_blob = $tmp_row[0]; |
| 591 | + } |
| 592 | + |
| 593 | + # Create a file to hold the results of this |
| 594 | + open(TMP_OUTPUT, ">tmp_output.txt"); |
| 595 | + print TMP_OUTPUT "$tmp_data_blob"; |
| 596 | + close(TMP_OUTPUT); |
| 597 | + |
| 598 | + # Check if protoc will read this |
| 599 | + my $result = `protoc --decode_raw < tmp_output.txt`; |
| 600 | + # Remove the file to be cleaner |
| 601 | + unlink "tmp_output.txt"; |
| 602 | + |
| 603 | + # If it starts with a number, it was able to parse it. |
| 604 | + if($result =~ /^\d+/) { |
| 605 | + print_log_line_if($log_file_handle, "\tProtobuf: Possibly found in $column_name ", $verbose); |
| 606 | + $total_identified_blobs += 1; |
| 607 | + count_mined_blob(File::Spec->abs2rel($file_name), $tmp_table_name, $column_name, $file_type); |
| 608 | + |
| 609 | + if($primary_key_column) { |
| 610 | + print_log_line_if($log_file_handle, "when $primary_key_column=$tmp_primary_key\n", $verbose); |
| 611 | + } else { |
| 612 | + print_log_line_if($log_file_handle, "(no primary key)\n", $verbose); |
| 613 | + } |
| 614 | + |
| 615 | + # Print out to the target CSV file |
| 616 | + (my $tmp_volume_for_output, my $tmp_directory_for_output, my $tmp_filename_for_output) = File::Spec->splitpath($file_name); |
| 617 | + print RESULT_OUTPUT "\"".File::Spec->abs2rel($tmp_directory_for_output)."\",". |
| 618 | + "\"$tmp_filename_for_output\",". |
| 619 | + "\"$tmp_table_name\",". |
| 620 | + "\"$column_name\",". |
| 621 | + "\"$primary_key_column\",". |
| 622 | + "\"$tmp_primary_key\",". |
| 623 | + "\"$file_type\""; |
| 624 | + |
| 625 | + # Save out the blob if we're exporting files |
| 626 | + if($export_files) { |
| 627 | + |
| 628 | + # Build the export filename (TABLE_COLUMN_[PRIMARYKEYCOLUMN_PRIMARYKEY].blob.EXTENSION) |
| 629 | + my $tmp_export_file_name = $tmp_table_name."-".$column_name; |
| 630 | + if($tmp_primary_key) { |
| 631 | + $tmp_export_file_name .= "-".$primary_key_column."-".$tmp_primary_key; |
| 632 | + } |
| 633 | + $tmp_export_file_name .= ".blob.protobuf"; |
| 634 | + my $tmp_export_file_path = File::Spec->catfile($export_directory, $tmp_export_file_name); |
| 635 | + my $tmp_export_file_counter = 1; |
| 636 | + |
| 637 | + # Keep looping until we're sure we have a unique file path |
| 638 | + while(-e $tmp_export_file_path) { |
| 639 | + $tmp_export_file_counter += 1; |
| 640 | + $tmp_export_file_path = File::Spec->catfile($export_directory, $tmp_export_file_name."_".$tmp_export_file_counter); |
| 641 | + } |
| 642 | + |
| 643 | + # Export the file |
| 644 | + (my $tmp_export_volume_for_output, my $tmp_export_directory_for_output, my $tmp_export_filename_for_output) = File::Spec->splitpath($tmp_export_file_path); |
| 645 | + print_log_line_if($log_file_handle, "\tExporting file as $tmp_export_filename_for_output\n", $very_verbose); |
| 646 | + |
| 647 | + # Save off the file |
| 648 | + open(EXPORT_OUTPUT, ">$tmp_export_file_path"); |
| 649 | + binmode(EXPORT_OUTPUT); |
| 650 | + print EXPORT_OUTPUT $tmp_data_blob; |
| 651 | + close(EXPORT_OUTPUT); |
| 652 | + |
| 653 | + # Record where we stored this |
| 654 | + print RESULT_OUTPUT ",\"$tmp_export_filename_for_output\""; |
| 655 | + } |
| 656 | + |
| 657 | + # Close the line in the output file |
| 658 | + print RESULT_OUTPUT "\n"; |
| 659 | + } |
| 660 | + } |
| 661 | + } |
567 | 662 | } |
568 | 663 |
|
569 | 664 | # Function normalizes a table name |
|
0 commit comments