add C.suffixes.suffix_not_acronyms so we don't need to duplicate checks

derek73 · derek73 · commit e72bcec851f5 · 2015-09-05T12:34:44.000-07:00
diff --git a/docs/release_log.rst b/docs/release_log.rst
@@ -1,8 +1,9 @@
 Release Log
 ===========
-* 0.3.9 - September 2, 2015
+* 0.3.9 - September 5, 2015
     - Separate suffixes that are acronyms to handle periods differently, fixes #29, #21
     - Don't find titles after first name is filled, fixes (#27)
+    - Add "chair" titles (#37)
 * 0.3.8 - September 2, 2015
     - Use regex to check for roman numerals at end of name (#36)
     - Add DVM to suffixes
diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py
@@ -37,6 +37,7 @@
 from nameparser.config.conjunctions import CONJUNCTIONS
 from nameparser.config.suffixes import SUFFIXES 
 from nameparser.config.suffixes import SUFFIX_ACRONYMS
+from nameparser.config.suffixes import SUFFIX_NOT_ACRONYMS
 from nameparser.config.titles import TITLES
 from nameparser.config.titles import FIRST_NAME_TITLES
 from nameparser.config.regexes import REGEXES
@@ -143,20 +144,22 @@ def __init__(self,
                     prefixes=PREFIXES, 
                     suffixes=SUFFIXES,
                     suffix_acronyms=SUFFIX_ACRONYMS,
+                    suffix_not_acronyms=SUFFIX_NOT_ACRONYMS,
                     titles=TITLES,
                     first_name_titles=FIRST_NAME_TITLES,
                     conjunctions=CONJUNCTIONS,
                     capitalization_exceptions=CAPITALIZATION_EXCEPTIONS,
                     regexes=REGEXES
                 ):
-        self.prefixes          = SetManager(prefixes)
-        self.suffixes          = SetManager(suffixes)
-        self.suffix_acronyms   = SetManager(suffix_acronyms)
-        self.titles            = SetManager(titles)
-        self.first_name_titles = SetManager(first_name_titles)
-        self.conjunctions      = SetManager(conjunctions)
+        self.prefixes            = SetManager(prefixes)
+        self.suffixes            = SetManager(suffixes)
+        self.suffix_acronyms     = SetManager(suffix_acronyms)
+        self.suffix_not_acronyms = SetManager(suffix_not_acronyms)
+        self.titles              = SetManager(titles)
+        self.first_name_titles   = SetManager(first_name_titles)
+        self.conjunctions        = SetManager(conjunctions)
         self.capitalization_exceptions = TupleManager(capitalization_exceptions)
-        self.regexes                = TupleManager(regexes)
+        self.regexes             = TupleManager(regexes)
         self._pst = None
     
     @property
diff --git a/nameparser/config/regexes.py b/nameparser/config/regexes.py
@@ -10,7 +10,7 @@
     ("nickname", re.compile(r'\s*?[\("](.+?)[\)"]', re.U)),
     ("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),
     ("no_vowels",re.compile(r'^[^aeyiuo]+$', re.I | re.U)),
-    ("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U))
+    ("period_not_at_end",re.compile(r'.*\..+$', re.I | re.U)),
 ])
 """
 All regular expressions used by the parser are precompiled and stored in the config.
diff --git a/nameparser/config/suffixes.py b/nameparser/config/suffixes.py
@@ -1,6 +1,20 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
+SUFFIX_NOT_ACRONYMS = set([
+    'esq',
+    'esquire',
+    'jr',
+    'jnr',
+    'sr',
+    'snr',
+    '2',
+    'i',
+    'ii',
+    'iii',
+    'iv',
+    'v',
+])
 SUFFIX_ACRONYMS = set([
     'ae',
     'afc',
@@ -94,21 +108,7 @@
     'vd',
     'vrd',
 ])
-
-SUFFIXES = SUFFIX_ACRONYMS | set([
-    'esq',
-    'esquire',
-    'jr',
-    'jnr',
-    'sr',
-    'snr',
-    '2',
-    'i',
-    'ii',
-    'iii',
-    'iv',
-    'v',
-])
+SUFFIXES = SUFFIX_ACRONYMS | SUFFIX_NOT_ACRONYMS
 """
 
 Pieces that come at the end of the name but are not last names. These potentially
diff --git a/nameparser/parser.py b/nameparser/parser.py
@@ -279,10 +279,16 @@ def is_roman_numeral(self, value):
         return bool(self.C.regexes.roman_numeral.match(value))
     
     def is_suffix(self, piece):
-        """Is in the suffixes set and not :py:func:`is_an_initial()`."""
+        """
+        Is in the suffixes set and not :py:func:`is_an_initial()`. 
+        
+        Some suffixes may be acronyms (M.B.A) while some are not (Jr.), 
+        so we remove the periods from `piece` when testing against
+        `C.suffix_acronyms`.
+        """
         # suffixes may have periods inside them like "M.D."
         return ((lc(piece).replace('.','') in self.C.suffix_acronyms) \
-            or (lc(piece) in self.C.suffixes)) \
+            or (lc(piece) in self.C.suffix_not_acronyms)) \
             and not self.is_an_initial(piece)
     
     def are_suffixes(self, pieces):
diff --git a/tests.py b/tests.py
@@ -290,11 +290,11 @@ def test7(self):
         self.m(hn.middle, "A.", hn)
 
     def test8(self):
-        hn = HumanName("John A. Doe, Jr.")
+        hn = HumanName("John A. Doe, Jr")
         self.m(hn.first, "John", hn)
         self.m(hn.last, "Doe", hn)
         self.m(hn.middle, "A.", hn)
-        self.m(hn.suffix, "Jr.", hn)
+        self.m(hn.suffix, "Jr", hn)
 
     def test9(self):
         hn = HumanName("John A. Doe III")
@@ -1587,7 +1587,7 @@ def test_initials_also_suffix(self):
         # self.m(hn.middle, "R.", hn)
         self.m(hn.last, "Smith", hn)
 
-    def test_two_title_parts_separated_by_commas(self):
+    def test_two_title_parts_separated_by_periods(self):
         hn = HumanName("Lt.Gen. John A. Kenneth Doe IV")
         self.m(hn.title, "Lt.Gen.", hn)
         self.m(hn.first, "John", hn)