Skip to content

Commit a459ef0

Browse files
Added quick hack to the protobuf branch to identify protobufs in various columns.
Signed-off-by: Three Planets Software <[email protected]>
1 parent 4220dae commit a459ef0

File tree

3 files changed

+107
-1
lines changed

3 files changed

+107
-1
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,9 @@ This script requires the following Perl packages:
4545
5. IO::Uncompress
4646
6. POSIX
4747
7. Time::HiRes
48+
49+
50+
## Protobufs
51+
If you want to run this to find protobufs, check out the `protobuf` branch. This is very experimental code and will not be optimized for large databases as it just tries to brute force the answer. It requires you to have the `protoc` [package](https://github.com/protocolbuffers/protobuf) installed and uses unix-specific commands, so Windows may be an issue.
52+
53+
To turn this functionality on, use the `--protobufs` switch on the command line.

changelog.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
Version 1.2.3 (September 17 2020)
2+
------------
3+
New Features
4+
- Hacked in protobuf identification to separate protobuf branch.
5+
16
Version 1.2.2 (October 28 2017)
27
-------------
38
New Features

sqlite_miner.pl

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
use POSIX qw(strftime);
2727
use Time::HiRes qw(time);
2828

29-
my $version = "1.2.2";
29+
my $version = "1.2.3";
3030

3131
# Set up initial variables
3232
my $start_time = time;
@@ -60,6 +60,7 @@
6060
'verbose' => \$verbose,
6161
'very-verbose' => \$very_verbose,
6262
'output=s' => \$output_directory,
63+
'protobufs' => \$protobufs,
6364
'help' => \$help);
6465

6566
# Set verbose if very-verbose was chosen
@@ -564,6 +565,100 @@ sub check_column_for_fun {
564565
}
565566

566567
}
568+
569+
# If we want to do it the hard way...
570+
if($protobufs) {
571+
572+
# Let's test them all for protobufs and hope it don't die
573+
my $tmp_query = $base_query . "WHERE $column_name NOT NULL";
574+
575+
# Build and execute query
576+
my $tmp_query_handler = $local_dbh->prepare($tmp_query);
577+
$tmp_query_handler->execute();
578+
579+
# Loop over all rows returned
580+
while(my @tmp_row = $tmp_query_handler->fetchrow_array()) {
581+
my $tmp_primary_key;
582+
my $tmp_data_blob;
583+
my $file_type = "protobuf";
584+
585+
# Rip out the data
586+
if($primary_key_column) {
587+
$tmp_primary_key = $tmp_row[0];
588+
$tmp_data_blob = $tmp_row[1];
589+
} else {
590+
$tmp_data_blob = $tmp_row[0];
591+
}
592+
593+
# Create a file to hold the results of this
594+
open(TMP_OUTPUT, ">tmp_output.txt");
595+
print TMP_OUTPUT "$tmp_data_blob";
596+
close(TMP_OUTPUT);
597+
598+
# Check if protoc will read this
599+
my $result = `protoc --decode_raw < tmp_output.txt`;
600+
# Remove the file to be cleaner
601+
unlink "tmp_output.txt";
602+
603+
# If it starts with a number, it was able to parse it.
604+
if($result =~ /^\d+/) {
605+
print_log_line_if($log_file_handle, "\tProtobuf: Possibly found in $column_name ", $verbose);
606+
$total_identified_blobs += 1;
607+
count_mined_blob(File::Spec->abs2rel($file_name), $tmp_table_name, $column_name, $file_type);
608+
609+
if($primary_key_column) {
610+
print_log_line_if($log_file_handle, "when $primary_key_column=$tmp_primary_key\n", $verbose);
611+
} else {
612+
print_log_line_if($log_file_handle, "(no primary key)\n", $verbose);
613+
}
614+
615+
# Print out to the target CSV file
616+
(my $tmp_volume_for_output, my $tmp_directory_for_output, my $tmp_filename_for_output) = File::Spec->splitpath($file_name);
617+
print RESULT_OUTPUT "\"".File::Spec->abs2rel($tmp_directory_for_output)."\",".
618+
"\"$tmp_filename_for_output\",".
619+
"\"$tmp_table_name\",".
620+
"\"$column_name\",".
621+
"\"$primary_key_column\",".
622+
"\"$tmp_primary_key\",".
623+
"\"$file_type\"";
624+
625+
# Save out the blob if we're exporting files
626+
if($export_files) {
627+
628+
# Build the export filename (TABLE_COLUMN_[PRIMARYKEYCOLUMN_PRIMARYKEY].blob.EXTENSION)
629+
my $tmp_export_file_name = $tmp_table_name."-".$column_name;
630+
if($tmp_primary_key) {
631+
$tmp_export_file_name .= "-".$primary_key_column."-".$tmp_primary_key;
632+
}
633+
$tmp_export_file_name .= ".blob.protobuf";
634+
my $tmp_export_file_path = File::Spec->catfile($export_directory, $tmp_export_file_name);
635+
my $tmp_export_file_counter = 1;
636+
637+
# Keep looping until we're sure we have a unique file path
638+
while(-e $tmp_export_file_path) {
639+
$tmp_export_file_counter += 1;
640+
$tmp_export_file_path = File::Spec->catfile($export_directory, $tmp_export_file_name."_".$tmp_export_file_counter);
641+
}
642+
643+
# Export the file
644+
(my $tmp_export_volume_for_output, my $tmp_export_directory_for_output, my $tmp_export_filename_for_output) = File::Spec->splitpath($tmp_export_file_path);
645+
print_log_line_if($log_file_handle, "\tExporting file as $tmp_export_filename_for_output\n", $very_verbose);
646+
647+
# Save off the file
648+
open(EXPORT_OUTPUT, ">$tmp_export_file_path");
649+
binmode(EXPORT_OUTPUT);
650+
print EXPORT_OUTPUT $tmp_data_blob;
651+
close(EXPORT_OUTPUT);
652+
653+
# Record where we stored this
654+
print RESULT_OUTPUT ",\"$tmp_export_filename_for_output\"";
655+
}
656+
657+
# Close the line in the output file
658+
print RESULT_OUTPUT "\n";
659+
}
660+
}
661+
}
567662
}
568663

569664
# Function normalizes a table name

0 commit comments

Comments
 (0)