-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatabase.py
More file actions
44 lines (39 loc) · 1.42 KB
/
database.py
File metadata and controls
44 lines (39 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import chromadb
from chromadb.utils import embedding_functions
client = chromadb.PersistentClient(path="./db")
embedding_fn = embedding_functions.DefaultEmbeddingFunction()
collection = client.get_or_create_collection(
name="translation_memory",
embedding_function=embedding_fn
)
def add_translation_pair(source_language, target_language, sentence, translation):
"""Stores a translation pair with language metadata."""
collection.add(
documents=[sentence],
metadatas=[{
"source_language": source_language, # Harmonized key
"target_language": target_language, # Harmonized key
"translation": translation
}],
ids=[f"{source_language}_{target_language}_{hash(sentence)}"]
)
def query_similar_pairs(source_language, target_language, query_sentence):
"""Retrieves up to 4 similar translation pairs."""
results = collection.query(
query_texts=[query_sentence],
n_results=4,
where={
"$and": [
{"source_language": source_language},
{"target_language": target_language}
]
}
)
pairs = []
if results['documents'] and len(results['documents']) > 0:
for doc, meta in zip(results['documents'][0], results['metadatas'][0]):
pairs.append({
"source": doc,
"target": meta['translation']
})
return pairs