Skip to content

Commit d5f9325

Browse files
kbafinkf
authored andcommitted
processors: use make_file_id and assert_file_grp_cardinality
1 parent 3c5b23e commit d5f9325

File tree

8 files changed

+49
-75
lines changed

8 files changed

+49
-75
lines changed

ocrd_cis/ocropy/binarize.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
from ocrd_utils import (
1111
getLogger,
12-
concat_padded,
12+
make_file_id,
13+
assert_file_grp_cardinality,
1314
MIMETYPE_PAGE
1415
)
1516
from ocrd_modelfactory import page_from_file
@@ -104,15 +105,12 @@ def process(self):
104105
Produce a new output file by serialising the resulting hierarchy.
105106
"""
106107
level = self.parameter['level-of-operation']
107-
assert len(self.output_file_grp.split(',')) == 1, \
108-
"Expected exactly one output file group, but '%s' has %d" % (
109-
self.output_file_grp, len(self.output_file_grp.split(',')))
108+
assert_file_grp_cardinality(self.input_file_grp, 1)
109+
assert_file_grp_cardinality(self.output_file_grp, 1)
110110

111111
for (n, input_file) in enumerate(self.input_files):
112112
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
113-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
114-
if file_id == input_file.ID:
115-
file_id = concat_padded(self.output_file_grp, n)
113+
file_id = make_file_id(input_file, self.output_file_grp)
116114

117115
pcgts = page_from_file(self.workspace.download_file(input_file))
118116
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -160,9 +158,6 @@ def process(self):
160158
file_id + '_' + region.id + '_' + line.id)
161159

162160
# update METS (add the PAGE file):
163-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
164-
if file_id == input_file.ID:
165-
file_id = concat_padded(self.output_file_grp, n)
166161
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
167162
out = self.workspace.add_file(
168163
ID=file_id,

ocrd_cis/ocropy/clip.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from ocrd import Processor
1717
from ocrd_utils import (
1818
getLogger,
19-
concat_padded,
19+
make_file_id,
20+
assert_file_grp_cardinality,
2021
coordinates_of_segment,
2122
polygon_from_points,
2223
bbox_from_polygon,
@@ -80,15 +81,12 @@ def process(self):
8081
# deskewing, because that would make segments incomensurable with their
8182
# neighbours.
8283
level = self.parameter['level-of-operation']
83-
assert len(self.output_file_grp.split(',')) == 1, \
84-
"Expected exactly one output file group, but '%s' has %d" % (
85-
self.output_file_grp, len(self.output_file_grp.split(',')))
84+
assert_file_grp_cardinality(self.input_file_grp, 1)
85+
assert_file_grp_cardinality(self.output_file_grp, 1)
8686

8787
for (n, input_file) in enumerate(self.input_files):
8888
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
89-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
90-
if file_id == input_file.ID:
91-
file_id = concat_padded(self.output_file_grp, n)
89+
file_id = make_file_id(input_file, self.output_file_grp)
9290

9391
pcgts = page_from_file(self.workspace.download_file(input_file))
9492
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -213,9 +211,6 @@ def process(self):
213211
input_file.pageId, file_id + '_' + region.id + '_' + line.id)
214212

215213
# update METS (add the PAGE file):
216-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
217-
if file_id == input_file.ID:
218-
file_id = concat_padded(self.output_file_grp, n)
219214
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
220215
out = self.workspace.add_file(
221216
ID=file_id,

ocrd_cis/ocropy/denoise.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from ocrd_utils import (
66
getLogger,
7-
concat_padded,
7+
make_file_id,
8+
assert_file_grp_cardinality,
89
MIMETYPE_PAGE
910
)
1011
from ocrd_modelfactory import page_from_file
@@ -53,15 +54,12 @@ def process(self):
5354
Produce a new output file by serialising the resulting hierarchy.
5455
"""
5556
level = self.parameter['level-of-operation']
56-
assert len(self.output_file_grp.split(',')) == 1, \
57-
"Expected exactly one output file group, but '%s' has %d" % (
58-
self.output_file_grp, len(self.output_file_grp.split(',')))
57+
assert_file_grp_cardinality(self.input_file_grp, 1)
58+
assert_file_grp_cardinality(self.output_file_grp, 1)
5959

6060
for (n, input_file) in enumerate(self.input_files):
6161
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
62-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
63-
if file_id == input_file.ID:
64-
file_id = concat_padded(self.output_file_grp, n)
62+
file_id = make_file_id(input_file, self.output_file_grp)
6563

6664
pcgts = page_from_file(self.workspace.download_file(input_file))
6765
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -122,9 +120,6 @@ def process(self):
122120
file_id + '_' + region.id + '_' + line.id)
123121

124122
# update METS (add the PAGE file):
125-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
126-
if file_id == input_file.ID:
127-
file_id = concat_padded(self.output_file_grp, n)
128123
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
129124
out = self.workspace.add_file(
130125
ID=file_id,

ocrd_cis/ocropy/deskew.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import os.path
44

55
from ocrd_utils import (
6-
getLogger, concat_padded,
6+
getLogger,
7+
make_file_id,
8+
assert_file_grp_cardinality,
79
rotate_image,
810
MIMETYPE_PAGE
911
)
@@ -58,15 +60,12 @@ def process(self):
5860
Produce a new output file by serialising the resulting hierarchy.
5961
"""
6062
level = self.parameter['level-of-operation']
61-
assert len(self.output_file_grp.split(',')) == 1, \
62-
"Expected exactly one output file group, but '%s' has %d" % (
63-
self.output_file_grp, len(self.output_file_grp.split(',')))
63+
assert_file_grp_cardinality(self.input_file_grp, 1)
64+
assert_file_grp_cardinality(self.output_file_grp, 1)
6465

6566
for (n, input_file) in enumerate(self.input_files):
6667
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
67-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
68-
if file_id == input_file.ID:
69-
file_id = concat_padded(self.output_file_grp, n)
68+
file_id = make_file_id(input_file, self.output_file_grp)
7069

7170
pcgts = page_from_file(self.workspace.download_file(input_file))
7271
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -112,9 +111,6 @@ def process(self):
112111
file_id + '_' + region.id)
113112

114113
# update METS (add the PAGE file):
115-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
116-
if file_id == input_file.ID:
117-
file_id = concat_padded(self.output_file_grp, n)
118114
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
119115
out = self.workspace.add_file(
120116
ID=file_id,

ocrd_cis/ocropy/dewarp.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
import os.path
44
import numpy as np
55

6-
from ocrd_utils import getLogger, concat_padded
6+
from ocrd_utils import (
7+
getLogger,
8+
make_file_id,
9+
assert_file_grp_cardinality,
10+
)
711
from ocrd_modelfactory import page_from_file
812
from ocrd_models.ocrd_page import (
913
MetadataItemType,
@@ -96,15 +100,12 @@ def process(self):
96100
97101
Produce a new output file by serialising the resulting hierarchy.
98102
"""
99-
assert len(self.output_file_grp.split(',')) == 1, \
100-
"Expected exactly one output file group, but '%s' has %d" % (
101-
self.output_file_grp, len(self.output_file_grp.split(',')))
103+
assert_file_grp_cardinality(self.input_file_grp, 1)
104+
assert_file_grp_cardinality(self.output_file_grp, 1)
102105

103106
for (n, input_file) in enumerate(self.input_files):
104107
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
105-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
106-
if file_id == input_file.ID:
107-
file_id = concat_padded(self.output_file_grp, n)
108+
file_id = make_file_id(input_file, self.output_file_grp)
108109

109110
pcgts = page_from_file(self.workspace.download_file(input_file))
110111
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -178,9 +179,6 @@ def process(self):
178179
comments=line_xywh['features'] + ',dewarped'))
179180

180181
# update METS (add the PAGE file):
181-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
182-
if file_id == input_file.ID:
183-
file_id = concat_padded(self.output_file_grp, n)
184182
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
185183
out = self.workspace.add_file(
186184
ID=file_id,

ocrd_cis/ocropy/recognize.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
import Levenshtein
88

99
from ocrd_utils import (
10-
getLogger, concat_padded,
10+
getLogger,
11+
make_file_id,
12+
assert_file_grp_cardinality,
1113
coordinates_for_segment,
1214
polygon_from_bbox,
1315
points_from_polygon,
@@ -130,6 +132,10 @@ def process(self):
130132
131133
Produce a new output file by serialising the resulting hierarchy.
132134
"""
135+
136+
assert_file_grp_cardinality(self.input_file_grp, 1)
137+
assert_file_grp_cardinality(self.output_file_grp, 1)
138+
133139
# from ocropus-rpred:
134140
self.network = load_object(self.get_model(), verbose=1)
135141
for x in self.network.walk():
@@ -171,12 +177,8 @@ def process(self):
171177
self.process_regions(regions, maxlevel, page_image, page_coords)
172178

173179
# update METS (add the PAGE file):
174-
file_id = input_file.ID.replace(self.input_file_grp,
175-
self.output_file_grp)
176-
if file_id == input_file.ID:
177-
file_id = concat_padded(self.output_file_grp, n)
178-
file_path = os.path.join(self.output_file_grp,
179-
file_id + '.xml')
180+
file_id = make_file_id(input_file.ID, self.output_file_grp)
181+
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
180182
out = self.workspace.add_file(
181183
ID=file_id,
182184
file_grp=self.output_file_grp,

ocrd_cis/ocropy/resegment.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from ocrd import Processor
1717
from ocrd_utils import (
1818
getLogger,
19-
concat_padded,
19+
make_file_id,
20+
assert_file_grp_cardinality,
2021
coordinates_of_segment,
2122
coordinates_for_segment,
2223
bbox_from_polygon,
@@ -153,15 +154,12 @@ def process(self):
153154
# pixel density (at least if source input is not 300 DPI).
154155
threshold = self.parameter['min_fraction']
155156
margin = self.parameter['extend_margins']
156-
assert len(self.output_file_grp.split(',')) == 1, \
157-
"Expected exactly one output file group, but '%s' has %d" % (
158-
self.output_file_grp, len(self.output_file_grp.split(',')))
157+
assert_file_grp_cardinality(self.input_file_grp, 1)
158+
assert_file_grp_cardinality(self.output_file_grp, 1)
159159

160160
for (n, input_file) in enumerate(self.input_files):
161161
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
162-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
163-
if file_id == input_file.ID:
164-
file_id = concat_padded(self.output_file_grp, n)
162+
file_id = make_file_id(input_file, self.output_file_grp)
165163

166164
pcgts = page_from_file(self.workspace.download_file(input_file))
167165
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
@@ -277,9 +275,6 @@ def process(self):
277275
comments=region_xywh['features']))
278276

279277
# update METS (add the PAGE file):
280-
file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp)
281-
if file_id == input_file.ID:
282-
file_id = concat_padded(self.output_file_grp, n)
283278
file_path = os.path.join(self.output_file_grp, file_id + '.xml')
284279
out = self.workspace.add_file(
285280
ID=file_id,

ocrd_cis/ocropy/segment.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
from ocrd import Processor
3434
from ocrd_utils import (
3535
getLogger,
36-
concat_padded,
36+
make_file_id,
37+
assert_file_grp_cardinality,
3738
coordinates_of_segment,
3839
coordinates_for_segment,
3940
points_from_polygon,
@@ -204,16 +205,13 @@ def process(self):
204205
overwrite_separators = self.parameter['overwrite_separators']
205206
overwrite_order = self.parameter['overwrite_order']
206207
oplevel = self.parameter['level-of-operation']
207-
assert len(self.output_file_grp.split(',')) == 1, \
208-
"Expected exactly one output file group, but '%s' has %d" % (
209-
self.output_file_grp, len(self.output_file_grp.split(',')))
208+
209+
assert_file_grp_cardinality(self.input_file_grp, 1)
210+
assert_file_grp_cardinality(self.output_file_grp, 1)
210211

211212
for (n, input_file) in enumerate(self.input_files):
212213
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
213-
file_id = input_file.ID.replace(self.input_file_grp,
214-
self.output_file_grp)
215-
if file_id == input_file.ID:
216-
file_id = concat_padded(self.output_file_grp, n)
214+
file_id = make_file_id(input_file, self.output_file_grp)
217215

218216
pcgts = page_from_file(self.workspace.download_file(input_file))
219217
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)

0 commit comments

Comments
 (0)