@@ -40,17 +40,25 @@ def main():
40
40
def stemmer (word ):
41
41
"""Return leading consonants (if any), and 'stem' of word"""
42
42
43
+ word = word .lower ()
43
44
vowels = 'aeiou'
44
45
consonants = '' .join (
45
46
[c for c in string .ascii_lowercase if c not in vowels ])
46
47
pattern = (
47
- '([' + consonants + ']+)?' # capture one or more, optional
48
- '(' # start capture
49
- '[' + vowels + ']' # at least one vowel
50
- '.*' # zero or more of anything else
51
- ')?' ) # end capture, optional group
52
- match = re .match (pattern , word .lower ())
53
- return (match .group (1 ) or '' , match .group (2 ) or '' ) if match else ('' , '' )
48
+ '([' + consonants + ']+)?' # capture one or more, optional
49
+ '([' + vowels + '])' # capture at least one vowel
50
+ '(.*)' # capture zero or more of anything
51
+ )
52
+ pattern = f'([{ consonants } ]+)?([{ vowels } ])(.*)'
53
+
54
+ match = re .match (pattern , word )
55
+ if match :
56
+ p1 = match .group (1 ) or ''
57
+ p2 = match .group (2 ) or ''
58
+ p3 = match .group (3 ) or ''
59
+ return (p1 , p2 + p3 )
60
+ else :
61
+ return (word , '' )
54
62
55
63
56
64
# --------------------------------------------------
@@ -62,6 +70,7 @@ def test_stemmer():
62
70
assert stemmer ('chair' ) == ('ch' , 'air' )
63
71
assert stemmer ('APPLE' ) == ('' , 'apple' )
64
72
assert stemmer ('RDNZL' ) == ('rdnzl' , '' )
73
+ assert stemmer ('123' ) == ('123' , '' )
65
74
66
75
67
76
# --------------------------------------------------
0 commit comments