11require 'java'
22require 'observer'
33
4- java . lang . System . setProperty ( 'org.jpedal.jai' , 'true' )
5- require_relative './jars/jpedal_lgpl.jar'
4+ require_relative '../lib/jars/tabula-1.0.0-SNAPSHOT-jar-with-dependencies.jar'
65
76java_import javax . imageio . ImageIO
87java_import java . awt . image . BufferedImage
98java_import java . awt . Image
109
11- java_import org . jpedal . PdfDecoder
12- java_import org . jpedal . fonts . FontMappings
10+ java_import org . apache . pdfbox . rendering . PDFRenderer
11+ java_import org . apache . pdfbox . pdmodel . PDDocument
12+ java_import java . io . ByteArrayOutputStream
13+
1314
1415class AbstractThumbnailGenerator
1516 include Observable
17+ SIZE = 800
1618
1719 def initialize ( pdf_filename , output_directory , sizes = [ 2048 , 560 ] )
1820 raise Errno ::ENOENT unless File . directory? ( output_directory )
@@ -48,41 +50,38 @@ def generate_thumbnails!
4850 end
4951end
5052
51- class JPedalThumbnailGenerator < AbstractThumbnailGenerator
53+ class PDFBox2ThumbnailGenerator < AbstractThumbnailGenerator
5254 def initialize ( pdf_filename , output_directory , sizes = [ 2048 , 560 ] )
5355 super ( pdf_filename , output_directory , sizes )
54- @decoder = PdfDecoder . new ( true )
55- FontMappings . setFontReplacements
56- @decoder . openPdfFile ( pdf_filename )
57- @decoder . setExtractionMode ( 0 , 1.0 )
58- @decoder . useHiResScreenDisplay ( true )
56+ @pdf_document = PDDocument . load ( java . io . File . new ( pdf_filename ) )
5957 end
60-
6158 def generate_thumbnails!
62- total_pages = @decoder . getPageCount
63-
64- total_pages . times do | i |
65-
66- begin
67- image = @decoder . getPageAsImage ( i + 1 ) ;
68- image_w , image_h = image . getWidth , image . getHeight
69-
70- @sizes . each do | s |
71- scale = s . to_f / image_w . to_f
72- bi = BufferedImage . new ( s , image_h * scale , image . getType )
73- bi . getGraphics . drawImage ( image . getScaledInstance ( s , image_h * scale , Image :: SCALE_SMOOTH ) , 0 , 0 , nil )
74- ImageIO . write ( bi ,
75- ' png' ,
76- java . io . File . new ( File . join ( @output_directory ,
77- "document_#{ s } _#{ i + 1 } .png" ) ) )
78- changed
79- notify_observers ( i + 1 , total_pages , "generating page thumbnails..." )
80- end
81- rescue java . lang . RuntimeException
82- # TODO What?
83- end
59+ renderer = PDFRenderer . new ( @pdf_document ) ;
60+ total_pages = @pdf_document . get_number_of_pages
61+
62+ total_pages . times do | pi |
63+ image = renderer . render_image_with_dpi ( pi , 75 ) ;
64+ imageWidth = image . width # was get_width
65+ imageHeight = image . height # was get_height
66+ scale = SIZE / imageWidth . to_f
67+
68+ bi = BufferedImage . new ( SIZE , ( imageHeight * scale ) . round , image . type ) ;
69+ bi . get_graphics . draw_image ( image . get_scaled_instance ( SIZE , ( imageHeight * scale ) . round , Image :: SCALE_SMOOTH ) , 0 , 0 , nil ) ;
70+
71+ out = ByteArrayOutputStream . new
72+ ImageIO . write ( bi , " png" , out ) ;
73+
74+ filename = "document_#{ SIZE } _#{ pi + 1 } .png"
75+ ImageIO . write ( bi ,
76+ 'png' ,
77+ java . io . File . new ( File . join ( @output_directory ,
78+ filename ) ) )
79+ STDERR . puts "Writing page thumbnail #{ filename } "
80+ notify_observers ( pi + 1 , total_pages , "generating page thumbnails..." )
8481 end
85- @decoder . closePdfFile
82+
83+ @pdf_document . close ( ) ;
84+
8685 end
8786end
8887
@@ -95,7 +94,8 @@ def update(page, total_pages)
9594 end
9695
9796 #pdftg = JPedalThumbnailGenerator.new(ARGV[0], '/tmp', [560])
98- pdftg = MUDrawThumbnailGenerator . new ( ARGV [ 0 ] , '/tmp' , [ 560 ] )
97+ # pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
98+ pdftg = PDFBox2ThumbnailGenerator . new ( ARGV [ 0 ] , '/tmp' , [ 560 ] )
9999 pdftg . add_observer ( STDERRProgressReporter . new )
100100 pdftg . generate_thumbnails!
101101end
0 commit comments