Skip to content

Commit 45e8ff9

Browse files
committed
Expose lossy read methods.
1 parent 8bb481f commit 45e8ff9

File tree

1 file changed

+40
-12
lines changed

1 file changed

+40
-12
lines changed

src/embeddings.rs

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,28 +60,42 @@ impl PyEmbeddings {
6060
Ok(())
6161
}
6262

63-
/// read_fasttext(path,/)
63+
/// read_fasttext(path,/ lossy)
6464
/// --
6565
///
6666
/// Read embeddings in the fasttext format.
67+
///
68+
/// Lossy decoding of the words can be toggled through the lossy param.
6769
#[staticmethod]
68-
fn read_fasttext(path: &str) -> PyResult<PyEmbeddings> {
69-
read_non_fifu_embeddings(path, |r| Embeddings::read_fasttext(r))
70+
#[args(lossy = false)]
71+
fn read_fasttext(path: &str, lossy: bool) -> PyResult<PyEmbeddings> {
72+
if lossy {
73+
read_non_fifu_embeddings(path, |r| Embeddings::read_fasttext_lossy(r))
74+
} else {
75+
read_non_fifu_embeddings(path, |r| Embeddings::read_fasttext(r))
76+
}
7077
}
7178

72-
/// read_text(path,/)
79+
/// read_text(path,/ lossy)
7380
/// --
7481
///
7582
/// Read embeddings in text format. This format uses one line per
7683
/// embedding. Each line starts with the word in UTF-8, followed
7784
/// by its vector components encoded in ASCII. The word and its
7885
/// components are separated by spaces.
86+
///
87+
/// Lossy decoding of the words can be toggled through the lossy param.
7988
#[staticmethod]
80-
fn read_text(path: &str) -> PyResult<PyEmbeddings> {
81-
read_non_fifu_embeddings(path, |r| Embeddings::read_text(r))
89+
#[args(lossy = false)]
90+
fn read_text(path: &str, lossy: bool) -> PyResult<PyEmbeddings> {
91+
if lossy {
92+
read_non_fifu_embeddings(path, |r| Embeddings::read_text_lossy(r))
93+
} else {
94+
read_non_fifu_embeddings(path, |r| Embeddings::read_text(r))
95+
}
8296
}
8397

84-
/// read_text_dims(path,/)
98+
/// read_text_dims(path,/ lossy)
8599
/// --
86100
///
87101
/// Read embeddings in text format with dimensions. In this format,
@@ -91,18 +105,32 @@ impl PyEmbeddings {
91105
/// one line per embedding. Each line starts with the word in UTF-8,
92106
/// followed by its vector components encoded in ASCII. The word and
93107
/// its components are separated by spaces.
108+
///
109+
/// Lossy decoding of the words can be toggled through the lossy param.
94110
#[staticmethod]
95-
fn read_text_dims(path: &str) -> PyResult<PyEmbeddings> {
96-
read_non_fifu_embeddings(path, |r| Embeddings::read_text_dims(r))
111+
#[args(lossy = false)]
112+
fn read_text_dims(path: &str, lossy: bool) -> PyResult<PyEmbeddings> {
113+
if lossy {
114+
read_non_fifu_embeddings(path, |r| Embeddings::read_text_dims_lossy(r))
115+
} else {
116+
read_non_fifu_embeddings(path, |r| Embeddings::read_text_dims(r))
117+
}
97118
}
98119

99-
/// read_word2vec(path,/)
120+
/// read_word2vec(path,/ lossy)
100121
/// --
101122
///
102123
/// Read embeddings in the word2vec binary format.
124+
///
125+
/// Lossy decoding of the words can be toggled through the lossy param.
103126
#[staticmethod]
104-
fn read_word2vec(path: &str) -> PyResult<PyEmbeddings> {
105-
read_non_fifu_embeddings(path, |r| Embeddings::read_word2vec_binary(r))
127+
#[args(lossy = false)]
128+
fn read_word2vec(path: &str, lossy: bool) -> PyResult<PyEmbeddings> {
129+
if lossy {
130+
read_non_fifu_embeddings(path, |r| Embeddings::read_word2vec_binary_lossy(r))
131+
} else {
132+
read_non_fifu_embeddings(path, |r| Embeddings::read_word2vec_binary(r))
133+
}
106134
}
107135

108136
/// Get the model's vocabulary.

0 commit comments

Comments
 (0)