Skip to content

Commit a63a444

Browse files
authored
Merge pull request #31 from contentinnovation/add-tables
Adds ability to specify pdftotext options
2 parents 50e973d + e6bf3a8 commit a63a444

File tree

4 files changed

+17
-4
lines changed

4 files changed

+17
-4
lines changed

lib/grim/page.rb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,22 @@ def save(path, options={})
4040
Grim.processor.save(@pdf, @index, path, options)
4141
end
4242

43-
# Extracts the text from the selected page.
43+
# Extracts the text from the selected page, using additional options.
4444
#
4545
# For example:
4646
#
4747
# pdf[1].text
4848
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
4949
#
50+
# pdf[1].text({flags: ["-table"]})
5051
# Returns a String.
5152
#
52-
def text
53-
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')
53+
def text(options={})
54+
flags = options.fetch(:flags, [])
55+
command_parts = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number]
56+
command_parts += flags if flags.length > 0
57+
command_parts += [Shellwords.escape(@pdf.path), "-"]
58+
command = command_parts.join(' ')
5459
Grim.logger.debug { "Running pdftotext command" }
5560
Grim.logger.debug { command }
5661
`#{command}`

lib/grim/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# encoding: UTF-8
22
module Grim
3-
VERSION = "1.2.0" unless defined?(::Grim::VERSION)
3+
VERSION = "1.2.1" unless defined?(::Grim::VERSION)
44
end

spec/fixtures/table.pdf

64.9 KB
Binary file not shown.

spec/lib/grim/page_spec.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
5252
end
5353

54+
it "should extract tabular data with the -table option" do
55+
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
56+
expect(pdf[0].text({flags: ["-table"]})).to \
57+
include(
58+
" Male 979 (85) 968 (85)\n\n" +
59+
" Female 169 (15) 169 (15)\n")
60+
end
61+
5462
it "works with full path to pdftotext" do
5563
pdftotext_path = `which pdftotext`.chomp
5664
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)

0 commit comments

Comments
 (0)