7272
7373# There's gotta be a better way...
7474function num_pages (pdf)
75- result = Poppler_jll. pdfinfo () do pdfinfo
76- return read (` $pdfinfo $pdf ` , String)
77- end
75+ result = read (` $(Poppler_jll. pdfinfo ()) $pdf ` , String)
7876 m = match (r" Pages\:\s *([0-9]*)" , result)
7977 return parse (Int, m. captures[1 ])
8078end
8583
8684# Use Poppler to extract the image
8785function get_images (pdf, page_range:: UnitRange{Int} , tmp, total_pages)
88- local logs
89- Poppler_jll. pdftoppm () do pdftoppm
90- return logs = run_and_collect_logs (` $pdftoppm -f $(first (page_range)) -l $(last (page_range)) $pdf -tiff -forcenum $(tmp) /page` )
91- end
86+ logs = run_and_collect_logs (` $(Poppler_jll. pdftoppm ()) -f $(first (page_range)) -l $(last (page_range)) $pdf -tiff -forcenum $(tmp) /page` )
9287 @debug " `pdftoppm`" logs
9388 paths = [joinpath (tmp, string (" page-" , lpad (page, ndigits (total_pages), ' 0' ), " .tif" ))
9489 for page in page_range]
9792
9893# Clean up an image with unpaper
9994function unpaper (img)
100- local logs
10195 img_base, img_ext = splitext (img)
10296 img_unpaper = img_base * " _unpaper" * img_ext
103- unpaper_jll. unpaper () do unpaper
104- return logs = run_and_collect_logs (` $unpaper $img $img_unpaper ` )
105- end
97+ logs = run_and_collect_logs (` $(unpaper_jll. unpaper ()) $img $img_unpaper ` )
10698 return (; img_unpaper, logs= (; binary= " unpaper" , logs... ))
10799end
108100
@@ -114,15 +106,11 @@ function make_pdf(img; tesseract_nthreads)
114106 data_path = get_data_path () * " /"
115107 img_base, img_ext = splitext (img)
116108 output = img_base
117- local logs
118- withenv (" OMP_THREAD_LIMIT" => tesseract_nthreads) do
119- Tesseract_jll. tesseract () do tesseract
120- cmd = ` $tesseract -l eng+equ --tessdata-dir $data_path $img $output -c tessedit_create_pdf=1`
121- @debug " Tesseracting!" img
122- logs = run_and_collect_logs (cmd)
123- @debug logs
124- end
125- end
109+ tesseract = addenv (Tesseract_jll. tesseract (), " OMP_THREAD_LIMIT" => tesseract_nthreads)
110+ cmd = ` $tesseract -l eng+equ --tessdata-dir $data_path $img $output -c tessedit_create_pdf=1`
111+ @debug " Tesseracting!" img
112+ logs = run_and_collect_logs (cmd)
113+ @debug logs
126114 return (; pdf= output * " .pdf" , logs= (; binary= " tesseract" , logs... ))
127115end
128116
131119# ####
132120
133121function unite_pdfs (pdfs, output)
134- local logs
135- Poppler_jll. pdfunite () do pdfunite
136- return logs = run_and_collect_logs (` $pdfunite $pdfs $output ` )
137- end
122+ logs = run_and_collect_logs (` $(Poppler_jll. pdfunite ()) $pdfs $output ` )
138123 return (; binary= " pdfunite" , logs... )
139124end
140125
0 commit comments