Skip to content

Commit 980f435

Browse files
committed
Merge branch 'develop' of https://github.com/nltk/nltk into develop
2 parents 24936a2 + 04d6a55 commit 980f435

File tree

4 files changed

+64
-48
lines changed

4 files changed

+64
-48
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,7 @@ def _next_token():
14211421
# map lemmas and parts of speech to synsets
14221422
self._lemma_pos_offset_map[lemma][pos] = synset_offsets
14231423
if pos == ADJ:
1424+
# Duplicate all adjectives indiscriminately?:
14241425
self._lemma_pos_offset_map[lemma][ADJ_SAT] = synset_offsets
14251426

14261427
def _load_exception_map(self):
@@ -2016,8 +2017,9 @@ def morphy(self, form, pos=None, check_exceptions=True):
20162017
"""
20172018
Find a possible base form for the given form, with the given
20182019
part of speech, by checking WordNet's list of exceptional
2019-
forms, and by recursively stripping affixes for this part of
2020-
speech until a form in WordNet is found.
2020+
forms, or by substituting suffixes for this part of speech.
2021+
If pos=None, try every part of speech until finding lemmas.
2022+
Return the first form found in WordNet, or eventually None.
20212023
20222024
>>> from nltk.corpus import wordnet as wn
20232025
>>> print(wn.morphy('dogs'))
@@ -2033,19 +2035,11 @@ def morphy(self, form, pos=None, check_exceptions=True):
20332035
book
20342036
>>> wn.morphy('book', wn.ADJ)
20352037
"""
2036-
2037-
if pos is None:
2038-
morphy = self._morphy
2039-
analyses = chain(a for p in POS_LIST for a in morphy(form, p))
2040-
else:
2038+
for pos in [pos] if pos else POS_LIST:
20412039
analyses = self._morphy(form, pos, check_exceptions)
2042-
2043-
# get the first one we find
2044-
first = list(islice(analyses, 1))
2045-
if len(first) == 1:
2046-
return first[0]
2047-
else:
2048-
return None
2040+
if analyses:
2041+
# Stop (don't try more parts of speech):
2042+
return analyses[0]
20492043

20502044
MORPHOLOGICAL_SUBSTITUTIONS = {
20512045
NOUN: [
@@ -2080,8 +2074,7 @@ def _morphy(self, form, pos, check_exceptions=True):
20802074
# Given an original string x
20812075
# 1. Apply rules once to the input to get y1, y2, y3, etc.
20822076
# 2. Return all that are in the database
2083-
# 3. If there are no matches, keep applying rules until you either
2084-
# find a match or you can't go any further
2077+
# (edited by ekaf) If there are no matches return an empty list.
20852078

20862079
exceptions = self._exception_map[pos]
20872080
substitutions = self.MORPHOLOGICAL_SUBSTITUTIONS[pos]
@@ -2105,28 +2098,15 @@ def filter_forms(forms):
21052098
seen.add(form)
21062099
return result
21072100

2108-
# 0. Check the exception lists
2109-
if check_exceptions:
2110-
if form in exceptions:
2111-
return filter_forms([form] + exceptions[form])
2112-
2113-
# 1. Apply rules once to the input to get y1, y2, y3, etc.
2114-
forms = apply_rules([form])
2101+
if check_exceptions and form in exceptions:
2102+
# 0. Check the exception lists
2103+
forms = exceptions[form]
2104+
else:
2105+
# 1. Apply rules once to the input to get y1, y2, y3, etc.
2106+
forms = apply_rules([form])
21152107

21162108
# 2. Return all that are in the database (and check the original too)
2117-
results = filter_forms([form] + forms)
2118-
if results:
2119-
return results
2120-
2121-
# 3. If there are no matches, keep applying rules until we find a match
2122-
while forms:
2123-
forms = apply_rules(forms)
2124-
results = filter_forms(forms)
2125-
if results:
2126-
return results
2127-
2128-
# Return an empty list if we can't find anything
2129-
return []
2109+
return filter_forms([form] + forms)
21302110

21312111
#############################################################
21322112
# Create information content from corpus

nltk/downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ def _simple_interactive_config(self):
12701270
print(" Cancelled!")
12711271
else:
12721272
if not new_url.startswith(("http://", "https://")):
1273-
new_url = "http://" + new_url
1273+
new_url = "https://" + new_url
12741274
try:
12751275
self._ds.url = new_url
12761276
except Exception as e:

nltk/parse/generate.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (C) 2001-2024 NLTK Project
44
# Author: Steven Bird <[email protected]>
55
# Peter Ljunglöf <[email protected]>
6+
# Eric Kafe <[email protected]>
67
# URL: <https://www.nltk.org/>
78
# For license information, see LICENSE.TXT
89
#
@@ -26,7 +27,8 @@ def generate(grammar, start=None, depth=None, n=None):
2627
if not start:
2728
start = grammar.start()
2829
if depth is None:
29-
depth = sys.maxsize
30+
# Safe default, assuming the grammar may be recursive:
31+
depth = (sys.getrecursionlimit() // 3) - 3
3032

3133
iter = _generate_all(grammar, [start], depth)
3234

@@ -45,7 +47,8 @@ def _generate_all(grammar, items, depth):
4547
except RecursionError as error:
4648
# Helpful error message while still showing the recursion stack.
4749
raise RuntimeError(
48-
"The grammar has rule(s) that yield infinite recursion!"
50+
"The grammar has rule(s) that yield infinite recursion!\n\
51+
Eventually use a lower 'depth', or a higher 'sys.setrecursionlimit()'."
4952
) from error
5053
else:
5154
yield []

nltk/stem/wordnet.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (C) 2001-2024 NLTK Project
44
# Author: Steven Bird <[email protected]>
55
# Edward Loper <[email protected]>
6+
# Eric Kafe <[email protected]>
67
# URL: <https://www.nltk.org/>
78
# For license information, see LICENSE.TXT
89

@@ -13,8 +14,45 @@ class WordNetLemmatizer:
1314
"""
1415
WordNet Lemmatizer
1516
16-
Lemmatize using WordNet's built-in morphy function.
17-
Returns the input word unchanged if it cannot be found in WordNet.
17+
Provides 3 lemmatizer modes:
18+
19+
1. _morphy() is an alias to WordNet's _morphy lemmatizer.
20+
It returns a list of all lemmas found in WordNet.
21+
22+
>>> wnl = WordNetLemmatizer()
23+
>>> print(wnl._morphy('us', 'n'))
24+
['us', 'u']
25+
26+
2. morphy() is a restrictive wrapper around _morphy().
27+
It returns the first lemma found in WordNet,
28+
or None if no lemma is found.
29+
30+
>>> print(wnl.morphy('us', 'n'))
31+
us
32+
33+
>>> print(wnl.morphy('catss'))
34+
None
35+
36+
3. lemmatize() is a permissive wrapper around _morphy().
37+
It returns the shortest lemma found in WordNet,
38+
or the input string unchanged if nothing is found.
39+
40+
>>> print(wnl.lemmatize('us', 'n'))
41+
u
42+
43+
>>> print(wnl.lemmatize('Anythinggoeszxcv'))
44+
Anythinggoeszxcv
45+
46+
"""
47+
48+
morphy = wn.morphy
49+
50+
_morphy = wn._morphy
51+
52+
def lemmatize(self, word: str, pos: str = "n") -> str:
53+
"""Lemmatize `word` by picking the shortest of the possible lemmas,
54+
using the wordnet corpus reader's built-in _morphy function.
55+
Returns the input word unchanged if it cannot be found in WordNet.
1856
1957
>>> from nltk.stem import WordNetLemmatizer
2058
>>> wnl = WordNetLemmatizer()
@@ -28,21 +66,16 @@ class WordNetLemmatizer:
2866
abacus
2967
>>> print(wnl.lemmatize('hardrock'))
3068
hardrock
31-
"""
32-
33-
def lemmatize(self, word: str, pos: str = "n") -> str:
34-
"""Lemmatize `word` using WordNet's built-in morphy function.
35-
Returns the input word unchanged if it cannot be found in WordNet.
3669
3770
:param word: The input word to lemmatize.
3871
:type word: str
3972
:param pos: The Part Of Speech tag. Valid options are `"n"` for nouns,
4073
`"v"` for verbs, `"a"` for adjectives, `"r"` for adverbs and `"s"`
4174
for satellite adjectives.
4275
:type pos: str
43-
:return: The lemma of `word`, for the given `pos`.
76+
:return: The shortest lemma of `word`, for the given `pos`.
4477
"""
45-
lemmas = wn._morphy(word, pos)
78+
lemmas = self._morphy(word, pos)
4679
return min(lemmas, key=len) if lemmas else word
4780

4881
def __repr__(self):

0 commit comments

Comments
 (0)