|
7 | 7 | # URL: <https://www.nltk.org/> |
8 | 8 | # For license information, see LICENSE.TXT |
9 | 9 |
|
10 | | -from nltk.corpus import wordnet as wn |
11 | | - |
12 | 10 |
|
13 | 11 | class WordNetLemmatizer: |
14 | 12 | """ |
15 | 13 | WordNet Lemmatizer |
16 | 14 |
|
17 | | - Provides 3 lemmatizer modes: |
18 | | -
|
19 | | - 1. _morphy() is an alias to WordNet's _morphy lemmatizer. |
20 | | - It returns a list of all lemmas found in WordNet. |
21 | | -
|
22 | | - >>> wnl = WordNetLemmatizer() |
23 | | - >>> print(wnl._morphy('us', 'n')) |
24 | | - ['us', 'u'] |
25 | | -
|
26 | | - 2. morphy() is a restrictive wrapper around _morphy(). |
27 | | - It returns the first lemma found in WordNet, |
28 | | - or None if no lemma is found. |
| 15 | + Provides 3 lemmatizer modes: _morphy(), morphy() and lemmatize(). |
29 | 16 |
|
30 | | - >>> print(wnl.morphy('us', 'n')) |
31 | | - us |
32 | | -
|
33 | | - >>> print(wnl.morphy('catss')) |
34 | | - None |
35 | | -
|
36 | | - 3. lemmatize() is a permissive wrapper around _morphy(). |
| 17 | + lemmatize() is a permissive wrapper around _morphy(). |
37 | 18 | It returns the shortest lemma found in WordNet, |
38 | 19 | or the input string unchanged if nothing is found. |
39 | 20 |
|
40 | | - >>> print(wnl.lemmatize('us', 'n')) |
| 21 | + >>> from nltk.stem import WordNetLemmatizer as wnl |
| 22 | + >>> print(wnl().lemmatize('us', 'n')) |
41 | 23 | u |
42 | 24 |
|
43 | | - >>> print(wnl.lemmatize('Anythinggoeszxcv')) |
| 25 | + >>> print(wnl().lemmatize('Anythinggoeszxcv')) |
44 | 26 | Anythinggoeszxcv |
45 | 27 |
|
46 | 28 | """ |
47 | 29 |
|
48 | | - morphy = wn.morphy |
| 30 | + def _morphy(self, form, pos, check_exceptions=True): |
| 31 | + """ |
| 32 | + _morphy() is WordNet's _morphy lemmatizer. |
| 33 | + It returns a list of all lemmas found in WordNet. |
| 34 | +
|
| 35 | + >>> from nltk.stem import WordNetLemmatizer as wnl |
| 36 | + >>> print(wnl()._morphy('us', 'n')) |
| 37 | + ['us', 'u'] |
| 38 | + """ |
| 39 | + from nltk.corpus import wordnet as wn |
| 40 | + |
| 41 | + return wn._morphy(form, pos, check_exceptions) |
| 42 | + |
| 43 | + def morphy(self, form, pos=None, check_exceptions=True): |
| 44 | + """ |
| 45 | + morphy() is a restrictive wrapper around _morphy(). |
| 46 | + It returns the first lemma found in WordNet, |
| 47 | + or None if no lemma is found. |
| 48 | +
|
| 49 | + >>> from nltk.stem import WordNetLemmatizer as wnl |
| 50 | + >>> print(wnl().morphy('us', 'n')) |
| 51 | + us |
| 52 | +
|
| 53 | + >>> print(wnl().morphy('catss')) |
| 54 | + None |
| 55 | + """ |
| 56 | + from nltk.corpus import wordnet as wn |
49 | 57 |
|
50 | | - _morphy = wn._morphy |
| 58 | + return wn.morphy(form, pos, check_exceptions) |
51 | 59 |
|
52 | 60 | def lemmatize(self, word: str, pos: str = "n") -> str: |
53 | 61 | """Lemmatize `word` by picking the shortest of the possible lemmas, |
54 | 62 | using the wordnet corpus reader's built-in _morphy function. |
55 | 63 | Returns the input word unchanged if it cannot be found in WordNet. |
56 | 64 |
|
57 | | - >>> from nltk.stem import WordNetLemmatizer |
58 | | - >>> wnl = WordNetLemmatizer() |
59 | | - >>> print(wnl.lemmatize('dogs')) |
| 65 | + >>> from nltk.stem import WordNetLemmatizer as wnl |
| 66 | + >>> print(wnl().lemmatize('dogs')) |
60 | 67 | dog |
61 | | - >>> print(wnl.lemmatize('churches')) |
| 68 | + >>> print(wnl().lemmatize('churches')) |
62 | 69 | church |
63 | | - >>> print(wnl.lemmatize('aardwolves')) |
| 70 | + >>> print(wnl().lemmatize('aardwolves')) |
64 | 71 | aardwolf |
65 | | - >>> print(wnl.lemmatize('abaci')) |
| 72 | + >>> print(wnl().lemmatize('abaci')) |
66 | 73 | abacus |
67 | | - >>> print(wnl.lemmatize('hardrock')) |
| 74 | + >>> print(wnl().lemmatize('hardrock')) |
68 | 75 | hardrock |
69 | 76 |
|
70 | 77 | :param word: The input word to lemmatize. |
|
0 commit comments