File tree Expand file tree Collapse file tree 1 file changed +22
-0
lines changed Expand file tree Collapse file tree 1 file changed +22
-0
lines changed Original file line number Diff line number Diff line change @@ -667,6 +667,20 @@ def restricted_pickle_load(string):
667667 return RestrictedUnpickler (BytesIO (string )).load ()
668668
669669
670+ def switch_punkt (lang = "english" ):
671+ """
672+ Return a pickle-free Punkt tokenizer instead of loading a pickle.
673+
674+ >>> import nltk
675+ >>> tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
676+ >>> print(tokenizer.tokenize("Hello! How are you?"))
677+ ['Hello!', 'How are you?']
678+ """
679+ from nltk .tokenize import PunktTokenizer as tok
680+
681+ return tok (lang )
682+
683+
670684def load (
671685 resource_url ,
672686 format = "auto" ,
@@ -750,6 +764,14 @@ def load(
750764 print (f"<<Using cached copy of { resource_url } >>" )
751765 return resource_val
752766
767+ resource_url = normalize_resource_url (resource_url )
768+ protocol , path_ = split_resource_url (resource_url )
769+
770+ if path_ [- 7 :] == ".pickle" :
771+ fil = os .path .split (path_ [:- 7 ])[- 1 ]
772+ if path_ .startswith ("tokenizers/punkt" ):
773+ return switch_punkt (fil )
774+
753775 # Let the user know what's going on.
754776 if verbose :
755777 print (f"<<Loading { resource_url } >>" )
You can’t perform that action at this time.
0 commit comments