-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathocr.pl
More file actions
34 lines (29 loc) · 971 Bytes
/
ocr.pl
File metadata and controls
34 lines (29 loc) · 971 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env perl
use strict;
use warnings;
use lib 'lib';
use PageUp::JSON;
use PageUp::Util;
use File::Basename;
# Get the file to OCR and get his name, extension and path
my $fileName = $ARGV[0];
my ($file,$dir,$ext) = fileparse($fileName, qr/\.[^.]*/);
# Create JSON file if not existing (should not happen at this stage though)
my $existingJSON = 0;
if (! -e "${file}.json"){
PageUp::JSON::createMetaFile("${file}.json");
}
else {
$existingJSON = 1;
}
# OCR the shit out of the file
my $datebefore = PageUp::Util::getCurrentTime();
my $result = `/usr/local/bin/tesseract $fileName $file > /dev/null 2>&1`;
my $dateafter = PageUp::Util::getCurrentTime();
# Add info in the JSON file
my $text = `cat $file.txt`;
PageUp::JSON::addOrModifyMeta($file, "ocr-date-before", $datebefore);
PageUp::JSON::addOrModifyMeta($file, "ocr-date-after", $dateafter);
PageUp::JSON::addOrModifyMeta($file, "ocr-text", $text);
# Clean our temp shit
$result = `rm -f ${file}.txt`;