15
15
)
16
16
from ocrd_modelfactory import page_from_file
17
17
from ocrd_models .ocrd_page import (
18
- MetadataItemType ,
19
- LabelsType , LabelType ,
20
18
to_xml , AlternativeImageType
21
19
)
22
20
from ocrd import Processor
@@ -118,21 +116,9 @@ def process(self):
118
116
file_id = make_file_id (input_file , self .output_file_grp )
119
117
120
118
pcgts = page_from_file (self .workspace .download_file (input_file ))
119
+ self .add_metadata (pcgts )
121
120
page_id = pcgts .pcGtsId or input_file .pageId or input_file .ID # (PageType has no id)
122
121
page = pcgts .get_Page ()
123
-
124
- # add metadata about this operation and its runtime parameters:
125
- metadata = pcgts .get_Metadata () # ensured by from_file()
126
- metadata .add_MetadataItem (
127
- MetadataItemType (type_ = "processingStep" ,
128
- name = self .ocrd_tool ['steps' ][0 ],
129
- value = TOOL ,
130
- Labels = [LabelsType (
131
- externalModel = "ocrd-tool" ,
132
- externalId = "parameters" ,
133
- Label = [LabelType (type_ = name ,
134
- value = self .parameter [name ])
135
- for name in self .parameter .keys ()])]))
136
122
137
123
page_image , page_xywh , page_image_info = self .workspace .image_from_page (
138
124
page , page_id , feature_filter = 'binarized' )
@@ -151,8 +137,10 @@ def process(self):
151
137
self .process_page (page , page_image , page_xywh , zoom ,
152
138
input_file .pageId , file_id )
153
139
else :
154
- regions = page .get_TextRegion () + (
155
- page .get_TableRegion () if level == 'region' else [])
140
+ if level == 'table' :
141
+ regions = page .get_TableRegion ()
142
+ else : # region
143
+ regions = page .get_AllRegions (classes = ['Text' ])
156
144
if not regions :
157
145
self .logger .warning ('Page "%s" contains no text regions' , page_id )
158
146
for region in regions :
0 commit comments