Skip to content

Commit 56b1871

Browse files
authored
improve error handling, add main and app (#11)
* wip * rm script * get tests passing * wip * wip * format * up * fix: update function parameters for make_pdf and unite_many_pdfs to include exit_on_error * rm unpaper * up * up * up
1 parent 9bdab46 commit 56b1871

File tree

7 files changed

+254
-272
lines changed

7 files changed

+254
-272
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,12 @@ jobs:
2424
arch:
2525
- x64
2626
steps:
27-
- uses: actions/checkout@v2
28-
- uses: julia-actions/setup-julia@v1
27+
- uses: actions/checkout@v4
28+
- uses: julia-actions/setup-julia@v2
2929
with:
3030
version: ${{ matrix.version }}
3131
arch: ${{ matrix.arch }}
32-
- uses: actions/cache@v1
33-
env:
34-
cache-name: cache-artifacts
35-
with:
36-
path: ~/.julia/artifacts
37-
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
38-
restore-keys: |
39-
${{ runner.os }}-test-${{ env.cache-name }}-
40-
${{ runner.os }}-test-
41-
${{ runner.os }}-
32+
- uses: julia-actions/cache@v2
4233
- uses: julia-actions/julia-buildpkg@v1
4334
- uses: julia-actions/julia-runtest@v1
4435
- uses: julia-actions/julia-processcoverage@v1

Project.toml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,30 @@ version = "0.1.0"
55

66
[deps]
77
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
8+
DocOpt = "968ba79b-81e4-546f-ab3a-2eecfa62a9db"
9+
OutputCollectors = "6c11c7d4-943b-4e2b-80de-f2cfc2930a8c"
810
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
911
Poppler_jll = "9c32591e-4766-534b-9725-b71a8799265b"
1012
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
1113
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1214
Scratch = "6c6a2e73-6563-6170-7368-637461726353"
1315
Tesseract_jll = "efd95c89-babc-5260-8753-618084eaf9d7"
14-
unpaper_jll = "d52248c9-e08a-51c2-9066-05d0bf3e6245"
1516

1617
[compat]
17-
Aqua = "0.5"
18+
Aqua = "0.8"
1819
CSV = "0.10"
20+
DocOpt = "0.5.0"
21+
OutputCollectors = "0.1.1"
22+
Pkg = "1"
1923
Poppler_jll = "21.9"
2024
ProgressMeter = "1.5"
25+
Random = "1"
2126
Scratch = "1"
2227
Tesseract_jll = "4.1.100"
23-
julia = "1.6"
24-
unpaper_jll = "6.1.100"
28+
Test = "1"
29+
julia = "1.10"
30+
31+
[apps.searchable-pdf]
2532

2633
[extras]
2734
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ It more-or-less works on MacOS (both Intel and Apple Silicon) and Linux.
3232
Next steps:
3333

3434
* Allow choice of training data used for tesseract
35-
* Look at what settings should be used for `unpaper`
3635
* Robustify and test on more files
3736
* Add better tests?
3837

@@ -42,6 +41,11 @@ Next steps:
4241
using SearchablePDFs
4342
file = ocr("test/test_rasterized.pdf")
4443
```
45-
or use `searchable`.
4644

47-
TODO- CLI using `@main`.
45+
Supports `@main` and on v1.12 an app `searchable-pdf`.
46+
47+
If you use [`juliaup`](https://github.com/JuliaLang/juliaup) you can install 1.12 with `juliaup add nightly`, then run
48+
```sh
49+
julia +nightly --startup-file=no -e 'using Pkg; Pkg.Apps.add(url="https://github.com/ericphanson/SearchablePDFs.jl")'
50+
```
51+
to install a CLI executable `searchable-pdf`. You can re-run this to update it.

cli/build.jl

Lines changed: 0 additions & 2 deletions
This file was deleted.

format/Manifest.toml

Lines changed: 51 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -1,217 +1,117 @@
11
# This file is machine-generated - editing it directly is not advised
22

3-
[[ArgTools]]
4-
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
5-
6-
[[Artifacts]]
7-
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
8-
93
[[Base64]]
104
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
5+
version = "1.11.0"
116

127
[[CSTParser]]
138
deps = ["Tokenize"]
14-
git-tree-sha1 = "60e9121d9ea044c30a04397e59b00c5d9eb826ee"
9+
git-tree-sha1 = "0157e592151e39fa570645e2b2debcdfb8a0f112"
1510
uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
16-
version = "2.5.0"
11+
version = "3.4.3"
1712

1813
[[CommonMark]]
19-
deps = ["Crayons", "JSON", "URIs"]
20-
git-tree-sha1 = "1bb620c8e76625df66585bec1d85c24c9575d230"
14+
deps = ["Crayons", "PrecompileTools"]
15+
git-tree-sha1 = "3faae67b8899797592335832fccf4b3c80bb04fa"
2116
uuid = "a80b9123-70ca-4bc0-993e-6e3bcb318db6"
22-
version = "0.7.3"
17+
version = "0.8.15"
2318

2419
[[Compat]]
25-
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
26-
git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b"
20+
deps = ["TOML", "UUIDs"]
21+
git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215"
2722
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
28-
version = "3.25.0"
23+
version = "4.16.0"
24+
25+
[Compat.extensions]
26+
CompatLinearAlgebraExt = "LinearAlgebra"
27+
28+
[Compat.weakdeps]
29+
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
30+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
2931

3032
[[Crayons]]
31-
git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d"
33+
git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15"
3234
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
33-
version = "4.0.4"
35+
version = "4.1.1"
3436

3537
[[DataStructures]]
3638
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
37-
git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
39+
git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82"
3840
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
39-
version = "0.18.9"
41+
version = "0.18.20"
4042

4143
[[Dates]]
4244
deps = ["Printf"]
4345
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
46+
version = "1.11.0"
4447

45-
[[DelimitedFiles]]
46-
deps = ["Mmap"]
47-
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
48-
49-
[[Distributed]]
50-
deps = ["Random", "Serialization", "Sockets"]
51-
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
52-
53-
[[DocStringExtensions]]
54-
deps = ["LibGit2", "Markdown", "Pkg", "Test"]
55-
git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1"
56-
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
57-
version = "0.8.3"
58-
59-
[[Documenter]]
60-
deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
61-
git-tree-sha1 = "3ebb967819b284dc1e3c0422229b58a40a255649"
62-
uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
63-
version = "0.26.3"
64-
65-
[[Downloads]]
66-
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
67-
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
68-
69-
[[IOCapture]]
70-
deps = ["Logging"]
71-
git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59"
72-
uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
73-
version = "0.1.1"
48+
[[Glob]]
49+
git-tree-sha1 = "97285bbd5230dd766e9ef6749b80fc617126d496"
50+
uuid = "c27321d9-0574-5035-807b-f59d2c89b15c"
51+
version = "1.3.1"
7452

7553
[[InteractiveUtils]]
7654
deps = ["Markdown"]
7755
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
78-
79-
[[JSON]]
80-
deps = ["Dates", "Mmap", "Parsers", "Unicode"]
81-
git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4"
82-
uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
83-
version = "0.21.1"
56+
version = "1.11.0"
8457

8558
[[JuliaFormatter]]
86-
deps = ["CSTParser", "CommonMark", "DataStructures", "Documenter", "Pkg", "Tokenize"]
87-
git-tree-sha1 = "7a86d3ba7307888497dd08611a345f61c0fb51c7"
59+
deps = ["CSTParser", "CommonMark", "DataStructures", "Glob", "PrecompileTools", "TOML", "Tokenize"]
60+
git-tree-sha1 = "59cf7ad64f1b0708a4fa4369879d33bad3239b56"
8861
uuid = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
89-
version = "0.13.4"
90-
91-
[[LibCURL]]
92-
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
93-
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
94-
95-
[[LibCURL_jll]]
96-
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
97-
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
98-
99-
[[LibGit2]]
100-
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
101-
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
102-
103-
[[LibSSH2_jll]]
104-
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
105-
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
106-
107-
[[Libdl]]
108-
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
109-
110-
[[LinearAlgebra]]
111-
deps = ["Libdl"]
112-
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
113-
114-
[[Logging]]
115-
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
62+
version = "1.0.62"
11663

11764
[[Markdown]]
11865
deps = ["Base64"]
11966
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
120-
121-
[[MbedTLS_jll]]
122-
deps = ["Artifacts", "Libdl"]
123-
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
124-
125-
[[Mmap]]
126-
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
127-
128-
[[MozillaCACerts_jll]]
129-
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
130-
131-
[[NetworkOptions]]
132-
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
67+
version = "1.11.0"
13368

13469
[[OrderedCollections]]
135-
git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf"
70+
git-tree-sha1 = "cc4054e898b852042d7b503313f7ad03de99c3dd"
13671
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
137-
version = "1.4.0"
72+
version = "1.8.0"
13873

139-
[[Parsers]]
140-
deps = ["Dates"]
141-
git-tree-sha1 = "223a825cccef2228f3fdbf2ecc7ca93363059073"
142-
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
143-
version = "1.0.16"
74+
[[PrecompileTools]]
75+
deps = ["Preferences"]
76+
git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
77+
uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
78+
version = "1.2.1"
14479

145-
[[Pkg]]
146-
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"]
147-
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
80+
[[Preferences]]
81+
deps = ["TOML"]
82+
git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6"
83+
uuid = "21216c6a-2e73-6563-6e65-726566657250"
84+
version = "1.4.3"
14885

14986
[[Printf]]
15087
deps = ["Unicode"]
15188
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
152-
153-
[[REPL]]
154-
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
155-
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
89+
version = "1.11.0"
15690

15791
[[Random]]
158-
deps = ["Serialization"]
92+
deps = ["SHA"]
15993
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
94+
version = "1.11.0"
16095

16196
[[SHA]]
16297
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
163-
164-
[[Serialization]]
165-
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
166-
167-
[[SharedArrays]]
168-
deps = ["Distributed", "Mmap", "Random", "Serialization"]
169-
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
170-
171-
[[Sockets]]
172-
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
173-
174-
[[SparseArrays]]
175-
deps = ["LinearAlgebra", "Random"]
176-
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
177-
178-
[[Statistics]]
179-
deps = ["LinearAlgebra", "SparseArrays"]
180-
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
98+
version = "0.7.0"
18199

182100
[[TOML]]
183101
deps = ["Dates"]
184102
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
185-
186-
[[Tar]]
187-
deps = ["ArgTools", "SHA"]
188-
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
189-
190-
[[Test]]
191-
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
192-
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
103+
version = "1.0.3"
193104

194105
[[Tokenize]]
195-
git-tree-sha1 = "45b1932b0ec576159181bf75df71d6d86aa9c850"
106+
git-tree-sha1 = "468b4685af4abe0e9fd4d7bf495a6554a6276e75"
196107
uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
197-
version = "0.5.13"
198-
199-
[[URIs]]
200-
git-tree-sha1 = "7855809b88d7b16e9b029afd17880930626f54a2"
201-
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
202-
version = "1.2.0"
108+
version = "0.5.29"
203109

204110
[[UUIDs]]
205111
deps = ["Random", "SHA"]
206112
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
113+
version = "1.11.0"
207114

208115
[[Unicode]]
209116
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
210-
211-
[[Zlib_jll]]
212-
deps = ["Libdl"]
213-
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
214-
215-
[[nghttp2_jll]]
216-
deps = ["Artifacts", "Libdl"]
217-
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
117+
version = "1.11.0"

0 commit comments

Comments
 (0)