File tree Expand file tree Collapse file tree 4 files changed +11
-0
lines changed
Expand file tree Collapse file tree 4 files changed +11
-0
lines changed Original file line number Diff line number Diff line change @@ -363,4 +363,7 @@ pyonmttok.is_placeholder(token: str)
363363
364364# Sets the random seed for reproducible tokenization.
365365pyonmttok.set_random_seed(seed: int )
366+
367+ # Checks if the language code is valid.
368+ pyonmttok.is_valid_language(lang: str ).
366369```
Original file line number Diff line number Diff line change @@ -495,6 +495,7 @@ static onmt::Vocab create_vocab(const std::optional<std::vector<std::string>>& s
495495PYBIND11_MODULE (_ext, m)
496496{
497497 m.def (" is_placeholder" , &onmt::Tokenizer::is_placeholder, py::arg (" token" ));
498+ m.def (" is_valid_language" , &onmt::unicode::is_valid_language, py::arg (" lang" ));
498499 m.def (" set_random_seed" , &onmt::set_random_seed, py::arg (" seed" ));
499500
500501 py::enum_<onmt::Casing>(m, " Casing" )
Original file line number Diff line number Diff line change 2929 TokenType ,
3030 Vocab ,
3131 is_placeholder ,
32+ is_valid_language ,
3233 set_random_seed ,
3334)
3435from pyonmttok .version import __version__
Original file line number Diff line number Diff line change @@ -17,6 +17,12 @@ def test_is_placeholder():
1717 assert pyonmttok .is_placeholder ("⦅hello⦆" )
1818
1919
20+ def test_is_valid_language ():
21+ assert pyonmttok .is_valid_language ("fr" )
22+ assert pyonmttok .is_valid_language ("de" )
23+ assert not pyonmttok .is_valid_language ("xx" )
24+
25+
2026def test_simple ():
2127 tokenizer = pyonmttok .Tokenizer ("aggressive" , joiner_annotate = True , joiner_new = True )
2228 text = "Hello World!"
You can’t perform that action at this time.
0 commit comments