@@ -76,12 +76,16 @@ def __init__(self, *args, **kwargs):
76
76
kwargs ['ocrd_tool' ] = self .ocrd_tool ['tools' ][TOOL ]
77
77
kwargs ['version' ] = self .ocrd_tool ['version' ]
78
78
super (OcropyBinarize , self ).__init__ (* args , ** kwargs )
79
- LOG = getLogger ('processor.OcropyBinarize' )
80
79
if hasattr (self , 'output_file_grp' ):
81
- if self .parameter ['grayscale' ] and self .parameter ['method' ] != 'ocropy' :
82
- LOG .critical ('requested method %s does not support grayscale normalized output' ,
83
- self .parameter ['method' ])
84
- raise Exception ('only method=ocropy allows grayscale=true' )
80
+ # processing context
81
+ self .setup ()
82
+
83
+ def setup (self ):
84
+ self .logger = getLogger ('processor.OcropyBinarize' )
85
+ if self .parameter ['grayscale' ] and self .parameter ['method' ] != 'ocropy' :
86
+ self .logger .critical ('requested method %s does not support grayscale normalized output' ,
87
+ self .parameter ['method' ])
88
+ raise Exception ('only method=ocropy allows grayscale=true' )
85
89
86
90
def process (self ):
87
91
"""Binarize (and optionally deskew/despeckle) the pages/regions/lines of the workspace.
@@ -105,13 +109,12 @@ def process(self):
105
109
106
110
Produce a new output file by serialising the resulting hierarchy.
107
111
"""
108
- LOG = getLogger ('processor.OcropyBinarize' )
109
112
level = self .parameter ['level-of-operation' ]
110
113
assert_file_grp_cardinality (self .input_file_grp , 1 )
111
114
assert_file_grp_cardinality (self .output_file_grp , 1 )
112
115
113
116
for (n , input_file ) in enumerate (self .input_files ):
114
- LOG .info ("INPUT FILE %i / %s" , n , input_file .pageId or input_file .ID )
117
+ self . logger .info ("INPUT FILE %i / %s" , n , input_file .pageId or input_file .ID )
115
118
file_id = make_file_id (input_file , self .output_file_grp )
116
119
117
120
pcgts = page_from_file (self .workspace .download_file (input_file ))
@@ -139,7 +142,7 @@ def process(self):
139
142
dpi = page_image_info .resolution
140
143
if page_image_info .resolutionUnit == 'cm' :
141
144
dpi *= 2.54
142
- LOG .info ('Page "%s" uses %f DPI' , page_id , dpi )
145
+ self . logger .info ('Page "%s" uses %f DPI' , page_id , dpi )
143
146
zoom = 300.0 / dpi
144
147
else :
145
148
zoom = 1
@@ -151,7 +154,7 @@ def process(self):
151
154
regions = page .get_TextRegion () + (
152
155
page .get_TableRegion () if level == 'region' else [])
153
156
if not regions :
154
- LOG .warning ('Page "%s" contains no text regions' , page_id )
157
+ self . logger .warning ('Page "%s" contains no text regions' , page_id )
155
158
for region in regions :
156
159
region_image , region_xywh = self .workspace .image_from_segment (
157
160
region , page_image , page_xywh , feature_filter = 'binarized' )
@@ -161,7 +164,8 @@ def process(self):
161
164
continue
162
165
lines = region .get_TextLine ()
163
166
if not lines :
164
- LOG .warning ('Page "%s" region "%s" contains no text lines' , page_id , region .id )
167
+ self .logger .warning ('Page "%s" region "%s" contains no text lines' ,
168
+ page_id , region .id )
165
169
for line in lines :
166
170
line_image , line_xywh = self .workspace .image_from_segment (
167
171
line , region_image , region_xywh , feature_filter = 'binarized' )
@@ -179,12 +183,11 @@ def process(self):
179
183
local_filename = file_path ,
180
184
mimetype = MIMETYPE_PAGE ,
181
185
content = to_xml (pcgts ))
182
- LOG .info ('created file ID: %s, file_grp: %s, path: %s' ,
183
- file_id , self .output_file_grp , out .local_filename )
186
+ self . logger .info ('created file ID: %s, file_grp: %s, path: %s' ,
187
+ file_id , self .output_file_grp , out .local_filename )
184
188
185
189
def process_page (self , page , page_image , page_xywh , zoom , page_id , file_id ):
186
- LOG = getLogger ('processor.OcropyBinarize' )
187
- LOG .info ("About to binarize page '%s'" , page_id )
190
+ self .logger .info ("About to binarize page '%s'" , page_id )
188
191
features = page_xywh ['features' ]
189
192
if 'angle' in page_xywh and page_xywh ['angle' ]:
190
193
# orientation has already been annotated (by previous deskewing),
@@ -229,8 +232,7 @@ def process_page(self, page, page_image, page_xywh, zoom, page_id, file_id):
229
232
comments = features ))
230
233
231
234
def process_region (self , region , region_image , region_xywh , zoom , page_id , file_id ):
232
- LOG = getLogger ('processor.OcropyBinarize' )
233
- LOG .info ("About to binarize page '%s' region '%s'" , page_id , region .id )
235
+ self .logger .info ("About to binarize page '%s' region '%s'" , page_id , region .id )
234
236
features = region_xywh ['features' ]
235
237
if 'angle' in region_xywh and region_xywh ['angle' ]:
236
238
# orientation has already been annotated (by previous deskewing),
@@ -277,9 +279,8 @@ def process_region(self, region, region_image, region_xywh, zoom, page_id, file_
277
279
comments = features ))
278
280
279
281
def process_line (self , line , line_image , line_xywh , zoom , page_id , region_id , file_id ):
280
- LOG = getLogger ('processor.OcropyBinarize' )
281
- LOG .info ("About to binarize page '%s' region '%s' line '%s'" ,
282
- page_id , region_id , line .id )
282
+ self .logger .info ("About to binarize page '%s' region '%s' line '%s'" ,
283
+ page_id , region_id , line .id )
283
284
features = line_xywh ['features' ]
284
285
bin_image , angle = binarize (line_image ,
285
286
method = self .parameter ['method' ],
@@ -294,8 +295,8 @@ def process_line(self, line, line_image, line_xywh, zoom, page_id, region_id, fi
294
295
#orientation = -angle
295
296
#orientation = 180 - (180 - orientation) % 360 # map to [-179.999,180]
296
297
#line.set_orientation(orientation) # does not exist on line level!
297
- LOG .warning ("cannot add orientation %.2f to page '%s' region '%s' line '%s'" ,
298
- - angle , page_id , region_id , line .id )
298
+ self . logger .warning ("cannot add orientation %.2f to page '%s' region '%s' line '%s'" ,
299
+ - angle , page_id , region_id , line .id )
299
300
bin_image = remove_noise (bin_image ,
300
301
maxsize = self .parameter ['noise_maxsize' ])
301
302
if self .parameter ['noise_maxsize' ]:
0 commit comments