Skip to content

Commit ef7a6dd

Browse files
Phil GoochPhil Gooch
authored andcommitted
Adds ability to specify pdftotext options
* Adds test for `-table` option that produces tabular output
1 parent 50e973d commit ef7a6dd

File tree

4 files changed

+13
-4
lines changed

4 files changed

+13
-4
lines changed

lib/grim/page.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,18 @@ def save(path, options={})
4040
Grim.processor.save(@pdf, @index, path, options)
4141
end
4242

43-
# Extracts the text from the selected page.
43+
# Extracts the text from the selected page, using additional options.
4444
#
4545
# For example:
4646
#
4747
# pdf[1].text
4848
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
4949
#
50+
# pdf[1].text(options=["-table"])
5051
# Returns a String.
5152
#
52-
def text
53-
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')
53+
def text(options=[])
54+
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, options.join(", "), Shellwords.escape(@pdf.path), "-"].join(' ')
5455
Grim.logger.debug { "Running pdftotext command" }
5556
Grim.logger.debug { command }
5657
`#{command}`

lib/grim/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# encoding: UTF-8
22
module Grim
3-
VERSION = "1.2.0" unless defined?(::Grim::VERSION)
3+
VERSION = "1.2.1" unless defined?(::Grim::VERSION)
44
end

spec/fixtures/table.pdf

64.9 KB
Binary file not shown.

spec/lib/grim/page_spec.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@
5151
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
5252
end
5353

54+
it "should extract tabular data with the -table option" do
55+
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
56+
expect(pdf[0].text(options=["-table"])).to \
57+
include(
58+
" Male 979 (85) 968 (85)\n\n" +
59+
" Female 169 (15) 169 (15)\n")
60+
end
61+
5462
it "works with full path to pdftotext" do
5563
pdftotext_path = `which pdftotext`.chomp
5664
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)

0 commit comments

Comments
 (0)