File tree Expand file tree Collapse file tree 1 file changed +14
-1
lines changed Expand file tree Collapse file tree 1 file changed +14
-1
lines changed Original file line number Diff line number Diff line change 5959For further information, please see Chapter 3 of the NLTK book.
6060"""
6161
62+ import functools
6263import re
6364
6465from nltk .data import load
9293from nltk .tokenize .util import regexp_span_tokenize , string_span_tokenize
9394
9495
96+ @functools .lru_cache
97+ def _get_punkt_tokenizer (language = "english" ):
98+ """
99+ A constructor for the PunktTokenizer that utilizes
100+ a lru cache for performance.
101+
102+ :param language: the model name in the Punkt corpus
103+ :type language: str
104+ """
105+ return PunktTokenizer (language )
106+
107+
95108# Standard sentence tokenizer.
96109def sent_tokenize (text , language = "english" ):
97110 """
@@ -103,7 +116,7 @@ def sent_tokenize(text, language="english"):
103116 :param text: text to split into sentences
104117 :param language: the model name in the Punkt corpus
105118 """
106- tokenizer = PunktTokenizer (language )
119+ tokenizer = _get_punkt_tokenizer (language )
107120 return tokenizer .tokenize (text )
108121
109122
You can’t perform that action at this time.
0 commit comments