Skip to content

Commit fdde90b

Browse files
committed
Integrate changes from finalfusion-rust.
Temporarily changes the finalfusion-rust dependency to the github repo. Report missing words in failed analogy queries, rename the similarity method to word_similarity.
1 parent 696007f commit fdde90b

File tree

4 files changed

+22
-12
lines changed

4 files changed

+22
-12
lines changed

Cargo.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ version = "0.7"
1919
features = ["extension-module"]
2020

2121
[dependencies]
22+
itertools = "0.8"
2223
failure = "0.1"
23-
finalfusion = "0.7"
24+
finalfusion = { git = "https://github.com/finalfusion/finalfusion-rust.git", rev = "8e360c6" }
2425
libc = "0.2"
2526
ndarray = "0.12"
2627
numpy = "0.6"

src/embeddings.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use failure::Error;
77
use finalfusion::metadata::Metadata;
88
use finalfusion::prelude::*;
99
use finalfusion::similarity::*;
10+
use itertools::Itertools;
1011
use ndarray::Array2;
1112
use numpy::{IntoPyArray, PyArray1, PyArray2};
1213
use pyo3::class::iter::PyIterProtocol;
@@ -85,11 +86,17 @@ impl PyEmbeddings {
8586
}
8687
};
8788

88-
let results =
89-
match embeddings.analogy_masked(word1, word2, word3, limit, [mask.0, mask.1, mask.2]) {
90-
Some(results) => results,
91-
None => return Err(exceptions::KeyError::py_err("Unknown word or n-grams")),
92-
};
89+
let results = embeddings
90+
.analogy_masked([word1, word2, word3], [mask.0, mask.1, mask.2], limit)
91+
.map_err(|lookup| {
92+
let failed = [word1, word2, word3]
93+
.iter()
94+
.zip(lookup.iter())
95+
.filter(|(_, success)| !*success)
96+
.map(|(word, _)| word)
97+
.join(" ");
98+
exceptions::KeyError::py_err(format!("Unknown word or n-grams: {}", failed))
99+
})?;
93100

94101
let mut r = Vec::with_capacity(results.len());
95102
for ws in results {
@@ -219,7 +226,7 @@ impl PyEmbeddings {
219226
}
220227
};
221228

222-
let results = match embeddings.similarity(word, limit) {
229+
let results = match embeddings.word_similarity(word, limit) {
223230
Some(results) => results,
224231
None => return Err(exceptions::KeyError::py_err("Unknown word and n-grams")),
225232
};

tests/test_analogy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,5 @@ def test_analogies(analogy_fifu):
8888
with pytest.raises(ValueError):
8989
analogy_fifu.analogy("Paris", "Frankreich", "Paris",
9090
1, (True, True, True, True))
91+
with pytest.raises(KeyError):
92+
analogy_fifu.analogy("Paris", "OOV", "Paris", 1, (True, True, True))

0 commit comments

Comments
 (0)