Skip to content

Commit e72bcec

Browse files
committed
add C.suffixes.suffix_not_acronyms so we don't need to duplicate checks
1 parent ff73df5 commit e72bcec

File tree

6 files changed

+39
-29
lines changed

6 files changed

+39
-29
lines changed

docs/release_log.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
Release Log
22
===========
3-
* 0.3.9 - September 2, 2015
3+
* 0.3.9 - September 5, 2015
44
- Separate suffixes that are acronyms to handle periods differently, fixes #29, #21
55
- Don't find titles after first name is filled, fixes (#27)
6+
- Add "chair" titles (#37)
67
* 0.3.8 - September 2, 2015
78
- Use regex to check for roman numerals at end of name (#36)
89
- Add DVM to suffixes

nameparser/config/__init__.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from nameparser.config.conjunctions import CONJUNCTIONS
3838
from nameparser.config.suffixes import SUFFIXES
3939
from nameparser.config.suffixes import SUFFIX_ACRONYMS
40+
from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
4041
from nameparser.config.titles import TITLES
4142
from nameparser.config.titles import FIRST_NAME_TITLES
4243
from nameparser.config.regexes import REGEXES
@@ -143,20 +144,22 @@ def __init__(self,
143144
prefixes=PREFIXES,
144145
suffixes=SUFFIXES,
145146
suffix_acronyms=SUFFIX_ACRONYMS,
147+
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
146148
titles=TITLES,
147149
first_name_titles=FIRST_NAME_TITLES,
148150
conjunctions=CONJUNCTIONS,
149151
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
150152
regexes=REGEXES
151153
):
152-
self.prefixes = SetManager(prefixes)
153-
self.suffixes = SetManager(suffixes)
154-
self.suffix_acronyms = SetManager(suffix_acronyms)
155-
self.titles = SetManager(titles)
156-
self.first_name_titles = SetManager(first_name_titles)
157-
self.conjunctions = SetManager(conjunctions)
154+
self.prefixes = SetManager(prefixes)
155+
self.suffixes = SetManager(suffixes)
156+
self.suffix_acronyms = SetManager(suffix_acronyms)
157+
self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
158+
self.titles = SetManager(titles)
159+
self.first_name_titles = SetManager(first_name_titles)
160+
self.conjunctions = SetManager(conjunctions)
158161
self.capitalization_exceptions = TupleManager(capitalization_exceptions)
159-
self.regexes = TupleManager(regexes)
162+
self.regexes = TupleManager(regexes)
160163
self._pst = None
161164

162165
@property

nameparser/config/regexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
1111
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
1212
("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
13-
("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U))
13+
("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
1414
])
1515
"""
1616
All regular expressions used by the parser are precompiled and stored in the config.

nameparser/config/suffixes.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import unicode_literals
33

4+
SUFFIX_NOT_ACRONYMS = set([
5+
'esq',
6+
'esquire',
7+
'jr',
8+
'jnr',
9+
'sr',
10+
'snr',
11+
'2',
12+
'i',
13+
'ii',
14+
'iii',
15+
'iv',
16+
'v',
17+
])
418
SUFFIX_ACRONYMS = set([
519
'ae',
620
'afc',
@@ -94,21 +108,7 @@
94108
'vd',
95109
'vrd',
96110
])
97-
98-
SUFFIXES = SUFFIX_ACRONYMS | set([
99-
'esq',
100-
'esquire',
101-
'jr',
102-
'jnr',
103-
'sr',
104-
'snr',
105-
'2',
106-
'i',
107-
'ii',
108-
'iii',
109-
'iv',
110-
'v',
111-
])
111+
SUFFIXES = SUFFIX_ACRONYMS | SUFFIX_NOT_ACRONYMS
112112
"""
113113
114114
Pieces that come at the end of the name but are not last names. These potentially

nameparser/parser.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,16 @@ def is_roman_numeral(self, value):
279279
return bool(self.C.regexes.roman_numeral.match(value))
280280

281281
def is_suffix(self, piece):
282-
"""Is in the suffixes set and not :py:func:`is_an_initial()`."""
282+
"""
283+
Is in the suffixes set and not :py:func:`is_an_initial()`.
284+
285+
Some suffixes may be acronyms (M.B.A) while some are not (Jr.),
286+
so we remove the periods from `piece` when testing against
287+
`C.suffix_acronyms`.
288+
"""
283289
# suffixes may have periods inside them like "M.D."
284290
return ((lc(piece).replace('.','') in self.C.suffix_acronyms) \
285-
or (lc(piece) in self.C.suffixes)) \
291+
or (lc(piece) in self.C.suffix_not_acronyms)) \
286292
and not self.is_an_initial(piece)
287293

288294
def are_suffixes(self, pieces):

tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,11 @@ def test7(self):
290290
self.m(hn.middle, "A.", hn)
291291

292292
def test8(self):
293-
hn = HumanName("John A. Doe, Jr.")
293+
hn = HumanName("John A. Doe, Jr")
294294
self.m(hn.first, "John", hn)
295295
self.m(hn.last, "Doe", hn)
296296
self.m(hn.middle, "A.", hn)
297-
self.m(hn.suffix, "Jr.", hn)
297+
self.m(hn.suffix, "Jr", hn)
298298

299299
def test9(self):
300300
hn = HumanName("John A. Doe III")
@@ -1587,7 +1587,7 @@ def test_initials_also_suffix(self):
15871587
# self.m(hn.middle, "R.", hn)
15881588
self.m(hn.last, "Smith", hn)
15891589

1590-
def test_two_title_parts_separated_by_commas(self):
1590+
def test_two_title_parts_separated_by_periods(self):
15911591
hn = HumanName("Lt.Gen. John A. Kenneth Doe IV")
15921592
self.m(hn.title, "Lt.Gen.", hn)
15931593
self.m(hn.first, "John", hn)

0 commit comments

Comments
 (0)