Skip to content

Commit 754c10c

Browse files
authored
Merge pull request nltk#3080 from tomaarsen/enhancement/lm_warn_vocab
Throw warning when `LanguageModel` is initialized with incorrect vocabulary
2 parents 960a45e + 8bd05b3 commit 754c10c

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

nltk/lm/api.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""Language Model Interface."""
88

99
import random
10+
import warnings
1011
from abc import ABCMeta, abstractmethod
1112
from bisect import bisect
1213
from itertools import accumulate
@@ -83,14 +84,20 @@ def __init__(self, order, vocabulary=None, counter=None):
8384
of creating a new one when training.
8485
:type vocabulary: `nltk.lm.Vocabulary` or None
8586
:param counter: If provided, use this object to count ngrams.
86-
:type vocabulary: `nltk.lm.NgramCounter` or None
87+
:type counter: `nltk.lm.NgramCounter` or None
8788
:param ngrams_fn: If given, defines how sentences in training text are turned to ngram
8889
sequences.
8990
:type ngrams_fn: function or None
9091
:param pad_fn: If given, defines how sentences in training text are padded.
9192
:type pad_fn: function or None
9293
"""
9394
self.order = order
95+
if vocabulary and not isinstance(vocabulary, Vocabulary):
96+
warnings.warn(
97+
f"The `vocabulary` argument passed to {self.__class__.__name__!r} "
98+
"must be an instance of `nltk.lm.Vocabulary`.",
99+
stacklevel=3,
100+
)
94101
self.vocab = Vocabulary() if vocabulary is None else vocabulary
95102
self.counts = NgramCounter() if counter is None else counter
96103

0 commit comments

Comments
 (0)