Skip to content

Commit 2bd4e86

Browse files
committed
Handle all old pickles
1 parent 64c0afc commit 2bd4e86

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

nltk/data.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# Copyright (C) 2001-2024 NLTK Project
44
# Author: Edward Loper <[email protected]>
5+
# Author: ekaf (Restricting and switching pickles)
56
# URL: <https://www.nltk.org/>
67
# For license information, see LICENSE.TXT
78

@@ -681,6 +682,41 @@ def switch_punkt(lang="english"):
681682
return tok(lang)
682683

683684

685+
def switch_chunker(fmt="multiclass"):
686+
"""
687+
Return a pickle-free Named Entity Chunker instead of loading a pickle.
688+
689+
690+
"""
691+
from nltk.chunker import ne_chunker
692+
693+
return ne_chunker(fmt)
694+
695+
696+
def switch_t_tagger():
697+
"""
698+
Return a pickle-free Treebank Pos Tagger instead of loading a pickle.
699+
700+
"""
701+
from nltk.classifier.maxent import maxent_pos_tagger
702+
703+
return maxent_pos_tagger()
704+
705+
706+
def switch_p_tagger(lang):
707+
"""
708+
Return a pickle-free Averaged Perceptron Tagger instead of loading a pickle.
709+
710+
"""
711+
from nltk.tag import _get_tagger
712+
713+
if lang == "ru":
714+
lang = "rus"
715+
else:
716+
lang = None
717+
return _get_tagger(lang)
718+
719+
684720
def load(
685721
resource_url,
686722
format="auto",
@@ -771,6 +807,12 @@ def load(
771807
fil = os.path.split(path_[:-7])[-1]
772808
if path_.startswith("tokenizers/punkt"):
773809
return switch_punkt(fil)
810+
elif path_.startswith("chunkers/maxent_ne_chunker"):
811+
return switch_chunker(fil.split("_")[-1])
812+
elif path_.startswith("taggers/maxent_treebank_pos_tagger"):
813+
return switch_t_tagger()
814+
elif path_.startswith("taggers/averaged_perceptron_tagger"):
815+
return switch_p_tagger(fil.split("_")[-1])
774816

775817
# Let the user know what's going on.
776818
if verbose:

0 commit comments

Comments
 (0)