Skip to content

Commit f685ed6

Browse files
committed
Added a distance method to the Glove class to measure the distance between two arbitrary words.
1 parent 4cd5ffd commit f685ed6

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

glove/glove.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,37 @@ def most_similar(self, word, number=5):
227227

228228
return self._similarity_query(self.word_vectors[word_idx], number)[1:]
229229

230+
def _distance(self, word1_vec, word2_vec):
231+
dst = (np.dot(word1_vec, word2_vec)
232+
/ np.linalg.norm(word1_vec)
233+
/ np.linalg.norm(word2_vec))
234+
235+
return dst
236+
237+
def distance(self, word1, word2):
238+
"""
239+
Return the distance between word1 and word2.
240+
"""
241+
242+
if self.word_vectors is None:
243+
raise Exception('Model must be fit before querying')
244+
245+
if self.dictionary is None:
246+
raise Exception('No word dictionary supplied')
247+
248+
try:
249+
word1_idx = self.dictionary[word1]
250+
except KeyError:
251+
raise Exception('Word not in dictionary')
252+
253+
try:
254+
word2_idx = self.dictionary[word2]
255+
except KeyError:
256+
raise Exception('Word not in dictionary')
257+
258+
return self._distance(self.word_vectors[word1_idx],
259+
self.word_vectors[word2_idx])
260+
230261
def most_similar_paragraph(self, paragraph, number=5, **kwargs):
231262
"""
232263
Return words most similar to a given paragraph (iterable of tokens).

0 commit comments

Comments
 (0)