|
2 | 2 | # |
3 | 3 | # Copyright (C) 2001-2024 NLTK Project |
4 | 4 | # Author: Edward Loper <[email protected]> |
| 5 | +# Author: ekaf (Restricting and switching pickles) |
5 | 6 | # URL: <https://www.nltk.org/> |
6 | 7 | # For license information, see LICENSE.TXT |
7 | 8 |
|
@@ -681,6 +682,41 @@ def switch_punkt(lang="english"): |
681 | 682 | return tok(lang) |
682 | 683 |
|
683 | 684 |
|
| 685 | +def switch_chunker(fmt="multiclass"): |
| 686 | + """ |
| 687 | + Return a pickle-free Named Entity Chunker instead of loading a pickle. |
| 688 | +
|
| 689 | +
|
| 690 | + """ |
| 691 | + from nltk.chunker import ne_chunker |
| 692 | + |
| 693 | + return ne_chunker(fmt) |
| 694 | + |
| 695 | + |
| 696 | +def switch_t_tagger(): |
| 697 | + """ |
| 698 | + Return a pickle-free Treebank Pos Tagger instead of loading a pickle. |
| 699 | +
|
| 700 | + """ |
| 701 | + from nltk.classifier.maxent import maxent_pos_tagger |
| 702 | + |
| 703 | + return maxent_pos_tagger() |
| 704 | + |
| 705 | + |
| 706 | +def switch_p_tagger(lang): |
| 707 | + """ |
| 708 | + Return a pickle-free Averaged Perceptron Tagger instead of loading a pickle. |
| 709 | +
|
| 710 | + """ |
| 711 | + from nltk.tag import _get_tagger |
| 712 | + |
| 713 | + if lang == "ru": |
| 714 | + lang = "rus" |
| 715 | + else: |
| 716 | + lang = None |
| 717 | + return _get_tagger(lang) |
| 718 | + |
| 719 | + |
684 | 720 | def load( |
685 | 721 | resource_url, |
686 | 722 | format="auto", |
@@ -771,6 +807,12 @@ def load( |
771 | 807 | fil = os.path.split(path_[:-7])[-1] |
772 | 808 | if path_.startswith("tokenizers/punkt"): |
773 | 809 | return switch_punkt(fil) |
| 810 | + elif path_.startswith("chunkers/maxent_ne_chunker"): |
| 811 | + return switch_chunker(fil.split("_")[-1]) |
| 812 | + elif path_.startswith("taggers/maxent_treebank_pos_tagger"): |
| 813 | + return switch_t_tagger() |
| 814 | + elif path_.startswith("taggers/averaged_perceptron_tagger"): |
| 815 | + return switch_p_tagger(fil.split("_")[-1]) |
774 | 816 |
|
775 | 817 | # Let the user know what's going on. |
776 | 818 | if verbose: |
|
0 commit comments