forked from OSU-NLP-Group/HippoRAG
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtests_local.py
More file actions
114 lines (94 loc) · 4.35 KB
/
tests_local.py
File metadata and controls
114 lines (94 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
from typing import List
import json
import argparse
import logging
from src.hipporag import HippoRAG
def main():
# Prepare datasets and evaluation
docs = [
"Oliver Badman is a politician.",
"George Rankin is a politician.",
"Thomas Marwick is a politician.",
"Cinderella attended the royal ball.",
"The prince used the lost glass slipper to search the kingdom.",
"When the slipper fit perfectly, Cinderella was reunited with the prince.",
"Erik Hort's birthplace is Montebello.",
"Marina is bom in Minsk.",
"Montebello is a part of Rockland County."
]
save_dir = 'outputs/local_test' # Define save directory for HippoRAG objects (each LLM/Embedding model combination will create a new subdirectory)
llm_model_name = 'meta-llama/Llama-3.1-8B-Instruct' # Any OpenAI model name
embedding_model_name = 'nvidia/NV-Embed-v2' # Embedding model name (NV-Embed, GritLM or Contriever for now)
# Startup a HippoRAG instance
hipporag = HippoRAG(save_dir=save_dir,
llm_model_name=llm_model_name,
embedding_model_name=embedding_model_name,
llm_base_url="http://localhost:6578/v1"
)
# Run indexing
hipporag.index(docs=docs)
# Separate Retrieval & QA
queries = [
"What is George Rankin's occupation?",
"How did Cinderella reach her happy ending?",
"What county is Erik Hort's birthplace a part of?"
]
# For Evaluation
answers = [
["Politician"],
["By going to the ball."],
["Rockland County"]
]
gold_docs = [
["George Rankin is a politician."],
["Cinderella attended the royal ball.",
"The prince used the lost glass slipper to search the kingdom.",
"When the slipper fit perfectly, Cinderella was reunited with the prince."],
["Erik Hort's birthplace is Montebello.",
"Montebello is a part of Rockland County."]
]
print(hipporag.rag_qa(queries=queries,
gold_docs=gold_docs,
gold_answers=answers)[-2:])
# Startup a HippoRAG instance
hipporag = HippoRAG(save_dir=save_dir,
llm_model_name=llm_model_name,
embedding_model_name=embedding_model_name,
azure_endpoint="https://bernal-hipporag.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2025-01-01-preview",
azure_embedding_endpoint="https://bernal-hipporag.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2023-05-15"
)
print(hipporag.rag_qa(queries=queries,
gold_docs=gold_docs,
gold_answers=answers)[-2:])
# Startup a HippoRAG instance
hipporag = HippoRAG(save_dir=save_dir,
llm_model_name=llm_model_name,
embedding_model_name=embedding_model_name,
azure_endpoint="https://bernal-hipporag.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2025-01-01-preview",
azure_embedding_endpoint="https://bernal-hipporag.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2023-05-15"
)
new_docs = [
"Tom Hort's birthplace is Montebello.",
"Sam Hort's birthplace is Montebello.",
"Bill Hort's birthplace is Montebello.",
"Cam Hort's birthplace is Montebello.",
"Montebello is a part of Rockland County.."]
# Run indexing
hipporag.index(docs=new_docs)
print(hipporag.rag_qa(queries=queries,
gold_docs=gold_docs,
gold_answers=answers)[-2:])
docs_to_delete = [
"Tom Hort's birthplace is Montebello.",
"Sam Hort's birthplace is Montebello.",
"Bill Hort's birthplace is Montebello.",
"Cam Hort's birthplace is Montebello.",
"Montebello is a part of Rockland County.."
]
hipporag.delete(docs_to_delete)
print(hipporag.rag_qa(queries=queries,
gold_docs=gold_docs,
gold_answers=answers)[-2:])
if __name__ == "__main__":
main()