Skip to content

Commit 96ee715

Browse files
committed
Merge branch 'develop' of https://github.com/nltk/nltk into develop
2 parents d3fde3a + fa81e34 commit 96ee715

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@
302302
- Akihiro Yamazaki <https://github.com/zakkie>
303303
- Ron Urbach <https://github.com/sharpblade4>
304304
- Vivek Kalyan <https://github.com/vivekkalyan>
305+
- Tom Strange https://github.com/strangetom
305306

306307
## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
307308

nltk/data.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -686,9 +686,15 @@ def switch_chunker(fmt="multiclass"):
686686
"""
687687
Return a pickle-free Named Entity Chunker instead of loading a pickle.
688688
689+
>>> import nltk
690+
>>> from nltk.corpus import treebank
691+
>>> from pprint import pprint
692+
>>> chunker = nltk.data.load('chunkers/maxent_ne_chunker/PY3/english_ace_multiclass.pickle')
693+
>>> pprint(chunker.parse(treebank.tagged_sents()[2][8:14])) # doctest: +NORMALIZE_WHITESPACE
694+
Tree('S', [('chairman', 'NN'), ('of', 'IN'), Tree('ORGANIZATION', [('Consolidated', 'NNP'), ('Gold', 'NNP'), ('Fields', 'NNP')]), ('PLC', 'NNP')])
689695
690696
"""
691-
from nltk.chunker import ne_chunker
697+
from nltk.chunk import ne_chunker
692698

693699
return ne_chunker(fmt)
694700

@@ -697,8 +703,14 @@ def switch_t_tagger():
697703
"""
698704
Return a pickle-free Treebank Pos Tagger instead of loading a pickle.
699705
706+
>>> import nltk
707+
>>> from nltk.tokenize import word_tokenize
708+
>>> tagger = nltk.data.load('taggers/maxent_treebank_pos_tagger/PY3/english.pickle')
709+
>>> print(tagger.tag(word_tokenize("Hello, how are you?")))
710+
[('Hello', 'NNP'), (',', ','), ('how', 'WRB'), ('are', 'VBP'), ('you', 'PRP'), ('?', '.')]
711+
700712
"""
701-
from nltk.classifier.maxent import maxent_pos_tagger
713+
from nltk.classify.maxent import maxent_pos_tagger
702714

703715
return maxent_pos_tagger()
704716

@@ -707,6 +719,12 @@ def switch_p_tagger(lang):
707719
"""
708720
Return a pickle-free Averaged Perceptron Tagger instead of loading a pickle.
709721
722+
>>> import nltk
723+
>>> from nltk.tokenize import word_tokenize
724+
>>> tagger = nltk.data.load('taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle')
725+
>>> print(tagger.tag(word_tokenize("Hello, how are you?")))
726+
[('Hello', 'NNP'), (',', ','), ('how', 'WRB'), ('are', 'VBP'), ('you', 'PRP'), ('?', '.')]
727+
710728
"""
711729
from nltk.tag import _get_tagger
712730

@@ -804,6 +822,8 @@ def load(
804822
protocol, path_ = split_resource_url(resource_url)
805823

806824
if path_[-7:] == ".pickle":
825+
if verbose:
826+
print(f"<<Loading pickle-free alternative to {resource_url}>>")
807827
fil = os.path.split(path_[:-7])[-1]
808828
if path_.startswith("tokenizers/punkt"):
809829
return switch_punkt(fil)

nltk/tag/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@
6565
isort:skip_file
6666
"""
6767

68+
import functools
69+
6870
from nltk.tag.api import TaggerI
6971
from nltk.tag.util import str2tuple, tuple2str, untag
7072
from nltk.tag.sequential import (
@@ -100,6 +102,7 @@
100102
}
101103

102104

105+
@functools.lru_cache
103106
def _get_tagger(lang=None):
104107
if lang == "rus":
105108
tagger = PerceptronTagger(lang=lang)

0 commit comments

Comments
 (0)