Skip to content

Commit 33c8588

Browse files
adds PDFBox2 thumbnail renderer; deletes JPedal
1 parent b340017 commit 33c8588

File tree

4 files changed

+36
-201
lines changed

4 files changed

+36
-201
lines changed

lib/jars/JPedal-LICENSE.txt

Lines changed: 0 additions & 165 deletions
This file was deleted.

lib/jars/jpedal_lgpl.jar

-2.79 MB
Binary file not shown.

lib/tabula_job_executor/jobs/generate_thumbnails.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def perform
1212
output_dir = options[:output_dir]
1313
thumbnail_sizes = options[:thumbnail_sizes]
1414

15-
generator = JPedalThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
15+
generator = PDFBox2ThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
1616
generator.add_observer(self, :at)
1717
generator.generate_thumbnails!
1818

lib/thumbnail_generator.rb

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
require 'java'
22
require 'observer'
33

4-
java.lang.System.setProperty('org.jpedal.jai', 'true')
5-
require_relative './jars/jpedal_lgpl.jar'
4+
require_relative '../lib/jars/tabula-1.0.0-SNAPSHOT-jar-with-dependencies.jar'
65

76
java_import javax.imageio.ImageIO
87
java_import java.awt.image.BufferedImage
98
java_import java.awt.Image
109

11-
java_import org.jpedal.PdfDecoder
12-
java_import org.jpedal.fonts.FontMappings
10+
java_import org.apache.pdfbox.rendering.PDFRenderer
11+
java_import org.apache.pdfbox.pdmodel.PDDocument
12+
java_import java.io.ByteArrayOutputStream
13+
1314

1415
class AbstractThumbnailGenerator
1516
include Observable
17+
SIZE = 800
1618

1719
def initialize(pdf_filename, output_directory, sizes=[2048, 560])
1820
raise Errno::ENOENT unless File.directory?(output_directory)
@@ -48,41 +50,38 @@ def generate_thumbnails!
4850
end
4951
end
5052

51-
class JPedalThumbnailGenerator < AbstractThumbnailGenerator
53+
class PDFBox2ThumbnailGenerator < AbstractThumbnailGenerator
5254
def initialize(pdf_filename, output_directory, sizes=[2048, 560])
5355
super(pdf_filename, output_directory, sizes)
54-
@decoder = PdfDecoder.new(true)
55-
FontMappings.setFontReplacements
56-
@decoder.openPdfFile(pdf_filename)
57-
@decoder.setExtractionMode(0, 1.0)
58-
@decoder.useHiResScreenDisplay(true)
56+
@pdf_document = PDDocument.load(java.io.File.new(pdf_filename))
5957
end
60-
6158
def generate_thumbnails!
62-
total_pages = @decoder.getPageCount
63-
64-
total_pages.times do |i|
65-
66-
begin
67-
image = @decoder.getPageAsImage(i+1);
68-
image_w, image_h = image.getWidth, image.getHeight
69-
70-
@sizes.each do |s|
71-
scale = s.to_f / image_w.to_f
72-
bi = BufferedImage.new(s, image_h * scale, image.getType)
73-
bi.getGraphics.drawImage(image.getScaledInstance(s, image_h * scale, Image::SCALE_SMOOTH), 0, 0, nil)
74-
ImageIO.write(bi,
75-
'png',
76-
java.io.File.new(File.join(@output_directory,
77-
"document_#{s}_#{i+1}.png")))
78-
changed
79-
notify_observers(i+1, total_pages, "generating page thumbnails...")
80-
end
81-
rescue java.lang.RuntimeException
82-
# TODO What?
83-
end
59+
renderer = PDFRenderer.new(@pdf_document);
60+
total_pages = @pdf_document.get_number_of_pages
61+
62+
total_pages.times do |pi|
63+
image = renderer.render_image_with_dpi(pi, 75);
64+
imageWidth = image.width # was get_width
65+
imageHeight = image.height # was get_height
66+
scale = SIZE / imageWidth.to_f
67+
68+
bi = BufferedImage.new(SIZE, (imageHeight * scale).round, image.type);
69+
bi.get_graphics.draw_image(image.get_scaled_instance(SIZE, (imageHeight * scale).round, Image::SCALE_SMOOTH), 0, 0, nil);
70+
71+
out = ByteArrayOutputStream.new
72+
ImageIO.write(bi, "png", out);
73+
74+
filename = "document_#{SIZE}_#{pi + 1}.png"
75+
ImageIO.write(bi,
76+
'png',
77+
java.io.File.new(File.join(@output_directory,
78+
filename)))
79+
STDERR.puts "Writing page thumbnail #{filename}"
80+
notify_observers(pi+1, total_pages, "generating page thumbnails...")
8481
end
85-
@decoder.closePdfFile
82+
83+
@pdf_document.close();
84+
8685
end
8786
end
8887

@@ -95,7 +94,8 @@ def update(page, total_pages)
9594
end
9695

9796
#pdftg = JPedalThumbnailGenerator.new(ARGV[0], '/tmp', [560])
98-
pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
97+
# pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
98+
pdftg = PDFBox2ThumbnailGenerator.new(ARGV[0], '/tmp', [560])
9999
pdftg.add_observer(STDERRProgressReporter.new)
100100
pdftg.generate_thumbnails!
101101
end

0 commit comments

Comments
 (0)