Skip to content

Commit 0f8fcaf

Browse files
committed
don't crash if regexes not defined
test can override all of the constants
1 parent 1fce485 commit 0f8fcaf

File tree

2 files changed

+71
-11
lines changed

2 files changed

+71
-11
lines changed

nameparser/parser.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import unicode_literals
33

44
import sys
5+
import re
56
from operator import itemgetter
67
from itertools import groupby
78

@@ -467,11 +468,14 @@ def post_process(self):
467468
self.handle_capitalization()
468469

469470
def fix_phd(self):
470-
_re = self.C.regexes.phd
471-
match = _re.search(self._full_name)
472-
if match:
473-
self.suffix_list.append(match.group(1))
474-
self._full_name = _re.sub('', self._full_name)
471+
try:
472+
_re = self.C.regexes.phd
473+
match = _re.search(self._full_name)
474+
if match:
475+
self.suffix_list.append(match.group(1))
476+
self._full_name = _re.sub('', self._full_name)
477+
except AttributeError:
478+
pass
475479

476480
def parse_nicknames(self):
477481
"""
@@ -485,10 +489,12 @@ def parse_nicknames(self):
485489
Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;
486490
`quoted_word`, `double_quotes` and `parenthesis`.
487491
"""
488-
489-
re_quoted_word = self.C.regexes.quoted_word
490-
re_double_quotes = self.C.regexes.double_quotes
491-
re_parenthesis = self.C.regexes.parenthesis
492+
493+
empty_re = re.compile("")
494+
495+
re_quoted_word = self.C.regexes.quoted_word or empty_re
496+
re_double_quotes = self.C.regexes.double_quotes or empty_re
497+
re_parenthesis = self.C.regexes.parenthesis or empty_re
492498

493499
for _re in (re_quoted_word, re_double_quotes, re_parenthesis):
494500
if _re.search(self._full_name):
@@ -704,7 +710,7 @@ def parse_pieces(self, parts, additional_parts_count=0):
704710
# constants so they get parsed correctly later
705711
for part in output:
706712
# if this part has a period not at the beginning or end
707-
if self.C.regexes.period_not_at_end.match(part):
713+
if self.C.regexes.period_not_at_end and self.C.regexes.period_not_at_end.match(part):
708714
# split on periods, any of the split pieces titles or suffixes?
709715
# ("Lt.Gov.")
710716
period_chunks = part.split(".")

tests.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
"""
2121

2222
import logging
23+
import re
2324
try:
2425
import dill
2526
except ImportError:
2627
dill = False
2728

2829
from nameparser import HumanName
2930
from nameparser.util import u
30-
from nameparser.config import Constants
31+
from nameparser.config import Constants, TupleManager
3132

3233
log = logging.getLogger('HumanName')
3334

@@ -199,6 +200,59 @@ def test_surnames_attribute(self):
199200
hn = HumanName("John Edgar Casey Williams III")
200201
self.m(hn.surnames, "Edgar Casey Williams", hn)
201202

203+
def test_override_constants(self):
204+
C = Constants()
205+
hn = HumanName(constants=C)
206+
self.assertTrue(hn.C is C)
207+
208+
def test_override_regex(self):
209+
var = TupleManager([("spaces", re.compile(r"\s+", re.U)),])
210+
C = Constants(regexes=var)
211+
hn = HumanName(constants=C)
212+
self.assertTrue(hn.C.regexes == var)
213+
214+
def test_override_titles(self):
215+
var = ["abc","def"]
216+
C = Constants(titles=var)
217+
hn = HumanName(constants=C)
218+
self.assertTrue(sorted(hn.C.titles) == sorted(var))
219+
220+
def test_override_first_name_titles(self):
221+
var = ["abc","def"]
222+
C = Constants(first_name_titles=var)
223+
hn = HumanName(constants=C)
224+
self.assertTrue(sorted(hn.C.first_name_titles) == sorted(var))
225+
226+
def test_override_prefixes(self):
227+
var = ["abc","def"]
228+
C = Constants(prefixes=var)
229+
hn = HumanName(constants=C)
230+
self.assertTrue(sorted(hn.C.prefixes) == sorted(var))
231+
232+
def test_override_suffix_acronyms(self):
233+
var = ["abc","def"]
234+
C = Constants(suffix_acronyms=var)
235+
hn = HumanName(constants=C)
236+
self.assertTrue(sorted(hn.C.suffix_acronyms) == sorted(var))
237+
238+
def test_override_suffix_not_acronyms(self):
239+
var = ["abc","def"]
240+
C = Constants(suffix_not_acronyms=var)
241+
hn = HumanName(constants=C)
242+
self.assertTrue(sorted(hn.C.suffix_not_acronyms) == sorted(var))
243+
244+
def test_override_conjunctions(self):
245+
var = ["abc","def"]
246+
C = Constants(conjunctions=var)
247+
hn = HumanName(constants=C)
248+
self.assertTrue(sorted(hn.C.conjunctions) == sorted(var))
249+
250+
def test_override_capitalization_exceptions(self):
251+
var = TupleManager([("spaces", re.compile(r"\s+", re.U)),])
252+
C = Constants(capitalization_exceptions=var)
253+
hn = HumanName(constants=C)
254+
self.assertTrue(hn.C.capitalization_exceptions == var)
255+
202256

203257
class FirstNameHandlingTests(HumanNameTestBase):
204258
def test_first_name(self):

0 commit comments

Comments
 (0)