@@ -59,18 +59,18 @@ def __init__(self):
5959 'multi_line' : False
6060 }, {
6161 'field' : 'surname' ,
62- 'find' : ['surname' ],
62+ 'find' : ['surname' , 'vansurname' ],
6363 'field_type' : FieldType .TEXT_ONLY ,
6464 'to_uppercase' : False ,
6565 'multi_line' : True ,
66- 'multi_line_end' : ['names ' , 'fore names' ]
66+ 'multi_line_end' : ['forenames ' , 'names' , 'voornameforenames ' ]
6767 }, {
6868 'field' : 'names' ,
69- 'find' : ['names ' , 'fore names' ],
69+ 'find' : ['forenames ' , 'names' , 'voornameforenames ' ],
7070 'field_type' : FieldType .TEXT_ONLY ,
7171 'to_uppercase' : False ,
7272 'multi_line' : True ,
73- 'multi_line_end' : ['sex' , 'country of birth ' ]
73+ 'multi_line_end' : ['country of birth' , ' sex' , 'geboortedistrik of-land ' ]
7474 }, {
7575 'field' : 'sex' ,
7676 'find' : ['sex' ],
@@ -85,7 +85,7 @@ def __init__(self):
8585 'multi_line' : False
8686 }, {
8787 'field' : 'country_of_birth' ,
88- 'find' : ['country of birth' ],
88+ 'find' : ['country of birth' , 'district or country of birth' ],
8989 'field_type' : FieldType .TEXT_ONLY ,
9090 'to_uppercase' : True ,
9191 'multi_line' : False
@@ -258,9 +258,6 @@ def dictify(self, id_string, barcode_data=None, fuzzy_min_ratio=60.0, max_multi_
258258 # create a dictionary object and populate it with
259259 # relevant information from said text.
260260 id_info = {}
261- # Attempt to populate id_info.
262- logger .debug ('Extracting details from the given text string...' )
263- self ._populate_id_information (id_string , id_info , fuzzy_min_ratio , max_multi_line )
264261 # Check if barcode data, containing the id number, exists and
265262 # if so, save it and extract some relevant information from it.
266263 # It should overwrite any existing fields that can be extracted from the id number, since
@@ -269,6 +266,9 @@ def dictify(self, id_string, barcode_data=None, fuzzy_min_ratio=60.0, max_multi_
269266 logger .debug ('Extracting details from barcode data...' )
270267 id_info ['identity_number' ] = barcode_data ['identity_number' ]
271268 self ._id_number_information_extraction (id_info , barcode_data ['identity_number' ])
269+ # Attempt to populate id_info with information from the given ID string.
270+ logger .debug ('Extracting details from the given text string...' )
271+ self ._populate_id_information (id_string , id_info , fuzzy_min_ratio , max_multi_line )
272272 # Perform some custom post-processing on the information that was extracted.
273273 logger .debug ('Post-processing some field values...' )
274274 self ._post_process (id_info )
@@ -331,20 +331,17 @@ def _populate_id_information(self, id_string, id_info, fuzzy_min_ratio, max_mult
331331 id_string_list = id_string .split ('\n ' )
332332 # Attempt to retrieve matches.
333333 for match_context in self .match_contexts :
334- # Logging for debugging purposes.
335- logger .debug ('Searching for field value for "%s"' % match_context ['field' ])
336334 # Extract desired field name from context as key.
337335 key = match_context ['field' ]
338336 # Only retrieve information if it does not exist or it could not previously
339337 # be determined.
340- id_info [key ] = self ._get_match (id_string_list , match_context , fuzzy_min_ratio , max_multi_line )
341- # Logging for debugging purposes.
342- logger .debug ('%s value found for "%s"' % ('Field' if id_info [key ] else 'No field' , match_context ['field' ]))
343- # If the ID number has been retrieved, use it to extract other useful information.
344- # It should overwrite any existing fields that can be extracted from the id number, since
345- # the information embedded within the id number is more reliable, at least theoretically.
346- if id_info ['identity_number' ]:
347- self ._id_number_information_extraction (id_info , id_info ['identity_number' ])
338+ if key not in id_info or not id_info [key ]:
339+ id_info [key ] = self ._get_match (id_string_list , match_context , fuzzy_min_ratio , max_multi_line )
340+ # If the ID number has been retrieved, use it to extract other useful information.
341+ # It should overwrite any existing fields that can be extracted from the id number, since
342+ # the information embedded within the id number is more reliable, at least theoretically.
343+ if key == 'identity_number' and id_info [key ]:
344+ self ._id_number_information_extraction (id_info , id_info [key ])
348345
349346 @staticmethod
350347 def _get_match (id_string_list , match_context , fuzzy_min_ratio , max_multi_line ):
0 commit comments