Skip to content

Commit a625827

Browse files
authored
Merge pull request #165 from javaTheHutts/162_tweak_text_manager
162 tweak text manager closes #162
2 parents 26c7cc3 + 057c895 commit a625827

File tree

1 file changed

+15
-18
lines changed

1 file changed

+15
-18
lines changed

src/main/python/image_processing/text_manager.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,18 @@ def __init__(self):
5959
'multi_line': False
6060
}, {
6161
'field': 'surname',
62-
'find': ['surname'],
62+
'find': ['surname', 'vansurname'],
6363
'field_type': FieldType.TEXT_ONLY,
6464
'to_uppercase': False,
6565
'multi_line': True,
66-
'multi_line_end': ['names', 'fore names']
66+
'multi_line_end': ['forenames', 'names', 'voornameforenames']
6767
}, {
6868
'field': 'names',
69-
'find': ['names', 'fore names'],
69+
'find': ['forenames', 'names', 'voornameforenames'],
7070
'field_type': FieldType.TEXT_ONLY,
7171
'to_uppercase': False,
7272
'multi_line': True,
73-
'multi_line_end': ['sex', 'country of birth']
73+
'multi_line_end': ['country of birth', 'sex', 'geboortedistrik of-land']
7474
}, {
7575
'field': 'sex',
7676
'find': ['sex'],
@@ -85,7 +85,7 @@ def __init__(self):
8585
'multi_line': False
8686
}, {
8787
'field': 'country_of_birth',
88-
'find': ['country of birth'],
88+
'find': ['country of birth', 'district or country of birth'],
8989
'field_type': FieldType.TEXT_ONLY,
9090
'to_uppercase': True,
9191
'multi_line': False
@@ -258,9 +258,6 @@ def dictify(self, id_string, barcode_data=None, fuzzy_min_ratio=60.0, max_multi_
258258
# create a dictionary object and populate it with
259259
# relevant information from said text.
260260
id_info = {}
261-
# Attempt to populate id_info.
262-
logger.debug('Extracting details from the given text string...')
263-
self._populate_id_information(id_string, id_info, fuzzy_min_ratio, max_multi_line)
264261
# Check if barcode data, containing the id number, exists and
265262
# if so, save it and extract some relevant information from it.
266263
# It should overwrite any existing fields that can be extracted from the id number, since
@@ -269,6 +266,9 @@ def dictify(self, id_string, barcode_data=None, fuzzy_min_ratio=60.0, max_multi_
269266
logger.debug('Extracting details from barcode data...')
270267
id_info['identity_number'] = barcode_data['identity_number']
271268
self._id_number_information_extraction(id_info, barcode_data['identity_number'])
269+
# Attempt to populate id_info with information from the given ID string.
270+
logger.debug('Extracting details from the given text string...')
271+
self._populate_id_information(id_string, id_info, fuzzy_min_ratio, max_multi_line)
272272
# Perform some custom post-processing on the information that was extracted.
273273
logger.debug('Post-processing some field values...')
274274
self._post_process(id_info)
@@ -331,20 +331,17 @@ def _populate_id_information(self, id_string, id_info, fuzzy_min_ratio, max_mult
331331
id_string_list = id_string.split('\n')
332332
# Attempt to retrieve matches.
333333
for match_context in self.match_contexts:
334-
# Logging for debugging purposes.
335-
logger.debug('Searching for field value for "%s"' % match_context['field'])
336334
# Extract desired field name from context as key.
337335
key = match_context['field']
338336
# Only retrieve information if it does not exist or it could not previously
339337
# be determined.
340-
id_info[key] = self._get_match(id_string_list, match_context, fuzzy_min_ratio, max_multi_line)
341-
# Logging for debugging purposes.
342-
logger.debug('%s value found for "%s"' % ('Field' if id_info[key] else 'No field', match_context['field']))
343-
# If the ID number has been retrieved, use it to extract other useful information.
344-
# It should overwrite any existing fields that can be extracted from the id number, since
345-
# the information embedded within the id number is more reliable, at least theoretically.
346-
if id_info['identity_number']:
347-
self._id_number_information_extraction(id_info, id_info['identity_number'])
338+
if key not in id_info or not id_info[key]:
339+
id_info[key] = self._get_match(id_string_list, match_context, fuzzy_min_ratio, max_multi_line)
340+
# If the ID number has been retrieved, use it to extract other useful information.
341+
# It should overwrite any existing fields that can be extracted from the id number, since
342+
# the information embedded within the id number is more reliable, at least theoretically.
343+
if key == 'identity_number' and id_info[key]:
344+
self._id_number_information_extraction(id_info, id_info[key])
348345

349346
@staticmethod
350347
def _get_match(id_string_list, match_context, fuzzy_min_ratio, max_multi_line):

0 commit comments

Comments
 (0)