Skip to content

Commit 9890777

Browse files
authored
don't mutate the global environment from a task (#6)
1 parent 8d1cb9d commit 9890777

File tree

1 file changed

+9
-24
lines changed

1 file changed

+9
-24
lines changed

src/SearchablePDFs.jl

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ end
7272

7373
# There's gotta be a better way...
7474
function num_pages(pdf)
75-
result = Poppler_jll.pdfinfo() do pdfinfo
76-
return read(`$pdfinfo $pdf`, String)
77-
end
75+
result = read(`$(Poppler_jll.pdfinfo()) $pdf`, String)
7876
m = match(r"Pages\:\s*([0-9]*)", result)
7977
return parse(Int, m.captures[1])
8078
end
@@ -85,10 +83,7 @@ end
8583

8684
# Use Poppler to extract the image
8785
function get_images(pdf, page_range::UnitRange{Int}, tmp, total_pages)
88-
local logs
89-
Poppler_jll.pdftoppm() do pdftoppm
90-
return logs = run_and_collect_logs(`$pdftoppm -f $(first(page_range)) -l $(last(page_range)) $pdf -tiff -forcenum $(tmp)/page`)
91-
end
86+
logs = run_and_collect_logs(`$(Poppler_jll.pdftoppm()) -f $(first(page_range)) -l $(last(page_range)) $pdf -tiff -forcenum $(tmp)/page`)
9287
@debug "`pdftoppm`" logs
9388
paths = [joinpath(tmp, string("page-", lpad(page, ndigits(total_pages), '0'), ".tif"))
9489
for page in page_range]
@@ -97,12 +92,9 @@ end
9792

9893
# Clean up an image with unpaper
9994
function unpaper(img)
100-
local logs
10195
img_base, img_ext = splitext(img)
10296
img_unpaper = img_base * "_unpaper" * img_ext
103-
unpaper_jll.unpaper() do unpaper
104-
return logs = run_and_collect_logs(`$unpaper $img $img_unpaper`)
105-
end
97+
logs = run_and_collect_logs(`$(unpaper_jll.unpaper()) $img $img_unpaper`)
10698
return (; img_unpaper, logs=(; binary="unpaper", logs...))
10799
end
108100

@@ -114,15 +106,11 @@ function make_pdf(img; tesseract_nthreads)
114106
data_path = get_data_path() * "/"
115107
img_base, img_ext = splitext(img)
116108
output = img_base
117-
local logs
118-
withenv("OMP_THREAD_LIMIT" => tesseract_nthreads) do
119-
Tesseract_jll.tesseract() do tesseract
120-
cmd = `$tesseract -l eng+equ --tessdata-dir $data_path $img $output -c tessedit_create_pdf=1`
121-
@debug "Tesseracting!" img
122-
logs = run_and_collect_logs(cmd)
123-
@debug logs
124-
end
125-
end
109+
tesseract = addenv(Tesseract_jll.tesseract(), "OMP_THREAD_LIMIT" => tesseract_nthreads)
110+
cmd = `$tesseract -l eng+equ --tessdata-dir $data_path $img $output -c tessedit_create_pdf=1`
111+
@debug "Tesseracting!" img
112+
logs = run_and_collect_logs(cmd)
113+
@debug logs
126114
return (; pdf=output * ".pdf", logs=(; binary="tesseract", logs...))
127115
end
128116

@@ -131,10 +119,7 @@ end
131119
#####
132120

133121
function unite_pdfs(pdfs, output)
134-
local logs
135-
Poppler_jll.pdfunite() do pdfunite
136-
return logs = run_and_collect_logs(`$pdfunite $pdfs $output`)
137-
end
122+
logs = run_and_collect_logs(`$(Poppler_jll.pdfunite()) $pdfs $output`)
138123
return (; binary="pdfunite", logs...)
139124
end
140125

0 commit comments

Comments
 (0)