Skip to content

Commit 1edabfe

Browse files
authored
Fix for nltk#3294
1 parent 27e49f7 commit 1edabfe

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

nltk/tokenize/punkt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,13 +1760,13 @@ def load_punkt_params(lang_dir):
17601760
pdec = PunktDecoder()
17611761
# Make a new Parameters object:
17621762
params = PunktParameters()
1763-
with open(f"{lang_dir}/collocations.tab") as f:
1763+
with open(f"{lang_dir}/collocations.tab", "r", encoding="utf-8") as f:
17641764
params.collocations = pdec.tab2tups(f)
1765-
with open(f"{lang_dir}/sent_starters.txt") as f:
1765+
with open(f"{lang_dir}/sent_starters.txt", "r", encoding="utf-8") as f:
17661766
params.sent_starters = pdec.txt2set(f)
1767-
with open(f"{lang_dir}/abbrev_types.txt") as f:
1767+
with open(f"{lang_dir}/abbrev_types.txt", "r", encoding="utf-8") as f:
17681768
params.abbrev_types = pdec.txt2set(f)
1769-
with open(f"{lang_dir}/ortho_context.tab") as f:
1769+
with open(f"{lang_dir}/ortho_context.tab", "r", encoding="utf-8") as f:
17701770
params.ortho_context = pdec.tab2intdict(f)
17711771
return params
17721772

0 commit comments

Comments
 (0)