Skip to content

Commit f402781

Browse files
authored
Merge pull request nltk#3296 from soras/patch-1
Fix for nltk#3294
2 parents 60b9a66 + 5aa47eb commit f402781

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

nltk/tokenize/punkt.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,13 +1760,13 @@ def load_punkt_params(lang_dir):
17601760
pdec = PunktDecoder()
17611761
# Make a new Parameters object:
17621762
params = PunktParameters()
1763-
with open(f"{lang_dir}/collocations.tab") as f:
1763+
with open(f"{lang_dir}/collocations.tab", encoding="utf-8") as f:
17641764
params.collocations = pdec.tab2tups(f)
1765-
with open(f"{lang_dir}/sent_starters.txt") as f:
1765+
with open(f"{lang_dir}/sent_starters.txt", encoding="utf-8") as f:
17661766
params.sent_starters = pdec.txt2set(f)
1767-
with open(f"{lang_dir}/abbrev_types.txt") as f:
1767+
with open(f"{lang_dir}/abbrev_types.txt", encoding="utf-8") as f:
17681768
params.abbrev_types = pdec.txt2set(f)
1769-
with open(f"{lang_dir}/ortho_context.tab") as f:
1769+
with open(f"{lang_dir}/ortho_context.tab", encoding="utf-8") as f:
17701770
params.ortho_context = pdec.tab2intdict(f)
17711771
return params
17721772

0 commit comments

Comments
 (0)