Skip to content

Commit ff06ffc

Browse files
author
Jan-Justin
committed
Added exceptions to TextManager and updated docstrings accordingly.
1 parent bc1d129 commit ff06ffc

File tree

1 file changed

+33
-8
lines changed

1 file changed

+33
-8
lines changed

src/main/python/prototype/processing/text_manager.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def __init__(self, fuzzy_min_ratio=65):
121121
# Set the maximum number of lines for a multi line field in the id string to extract
122122
self._max_multi_line = 2
123123

124-
def clean_up(self, string, exclusions=None, append_to_exclusions=True):
124+
def clean_up(self, in_string, deplorables=None, append_to_deplorables=True):
125125
"""
126126
This function serves to receive an input string, clean it up through removing undesirable characters and
127127
unnecessary whitespace, and to return the cleaned string.
@@ -130,17 +130,31 @@ def clean_up(self, string, exclusions=None, append_to_exclusions=True):
130130
Jan-Justin van Tonder
131131
132132
Args:
133-
string (str): The input string that is to be cleaned.
134-
exclusions (list, Optional): A list of characters that are to be filtered from the input string.
135-
append_to_exclusions (bool, Optional): Indicates whether the list of exclusions should be appended to the
133+
in_string (str): The input string that is to be cleaned.
134+
deplorables (list, Optional): A list of characters that are to be filtered from the input string.
135+
append_to_deplorables (bool, Optional): Indicates whether the list of exclusions should be appended to the
136136
existing list of exclusions in the class if true, if false it will overwrite the existing list.
137137
138138
Returns:
139139
str: A string that has been stripped of undesirable characters and unnecessary whitespace.
140+
141+
Raises:
142+
TypeError: If in_string is not a string.
143+
TypeError: If deplorables is not a list of strings.
144+
TypeError: If append_to_deplorables is not a bool.
140145
"""
146+
# Check if the correct argument types have been passed in.
147+
if type(in_string) is not str:
148+
raise TypeError('Bad type for arg in_string - expected string. Received type ' + str(type(in_string)))
149+
if type(deplorables) is not list or (deplorables and type(deplorables[0]) is not str):
150+
raise TypeError('Bad type for arg deplorables - expected list of strings. Received type '
151+
+ str(type(deplorables)))
152+
if type(append_to_deplorables) is not bool:
153+
raise TypeError('Bad type for arg append_to_deplorables - expected list of strings. Received type '
154+
+ str(type(append_to_deplorables)))
141155
# Remove undesirable characters, spaces and newlines.
142-
compiled_deplorable_re = self._compile_deplorables(exclusions, append_to_exclusions)
143-
sanitised = re.sub(compiled_deplorable_re, '', string)
156+
compiled_deplorable_re = self._compile_deplorables(deplorables, append_to_deplorables)
157+
sanitised = re.sub(compiled_deplorable_re, '', in_string)
144158
# Remove empty lines in between text-filled lines.
145159
stripped_and_sanitised = re.sub(r'(\n\s*\n)', '\n', sanitised)
146160
# Remove multiple spaces before text-filled line.
@@ -158,8 +172,9 @@ def _compile_deplorables(self, deplorables, append_to_deplorables):
158172
159173
Args:
160174
deplorables (list): A list of characters that are to be filtered from the input string.
161-
append_to_deplorables (bool): Indicates whether the list of exclusions should be appended to the existing
162-
list of exclusions in the class if true, if false it will overwrite the existing list.
175+
append_to_deplorables (bool): Indicates whether the list of characters to be should be appended to the
176+
existing list of characters to be excluded in the class if true, if false it will overwrite the
177+
existing list.
163178
164179
Returns:
165180
A compiled regex pattern used to match undesirable characters in a string.
@@ -221,7 +236,17 @@ def dictify(self, id_string, barcode_data=None):
221236
222237
Returns:
223238
(dict): A dictionary object containing the relevant, extracted ID information.
239+
240+
Raises:
241+
TypeError: If id_string is not a string.
242+
TypeError: If barcode_data is not a dictionary.
224243
"""
244+
# Check if arguments passed in are the correct type.
245+
if type(id_string) is not str:
246+
raise TypeError('Bad type for arg id_string - expected string. Received type ' + str(type(id_string)))
247+
if barcode_data and type(barcode_data) is not dict:
248+
raise TypeError('Bad type for arg id_string - expected dictionary. Received type '
249+
+ str(type(barcode_data)))
225250
# Given a string containing extracted ID text,
226251
# create a dictionary object and populate it with
227252
# relevant information from said text.

0 commit comments

Comments
 (0)