forked from microsoft/agent-lightning
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwiki_retriever_mcp.py
More file actions
52 lines (38 loc) · 1.41 KB
/
wiki_retriever_mcp.py
File metadata and controls
52 lines (38 loc) · 1.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Copyright (c) Microsoft. All rights reserved.
# type: ignore
import pickle
import faiss
from fastmcp import FastMCP
from sentence_transformers import SentenceTransformer
index = faiss.read_index("data/index_hnsw_faiss_n32e40_tiny.index")
print("Index loaded successfully.")
model = SentenceTransformer("BAAI/bge-large-en-v1.5")
print("Model loaded successfully.")
# with open('/mnt/input/agent_lightning/nq_list.pkl', 'rb') as f:
with open("data/chunks_candidate_tiny.pkl", "rb") as f:
chunks = pickle.load(f)
print("Chunks loaded successfully.")
mcp = FastMCP(name="wiki retrieval mcp")
@mcp.tool(
name="retrieve",
description="retrieve relevant chunks from the wikipedia",
)
def retrieve(query: str) -> list:
"""
Retrieve relevant chunks from the Wikipedia dataset.
Args:
query (str): The query string to search for.
Returns:
list: A list of dictionaries containing the retrieved chunks and their metadata.
"""
top_k = 1 # Number of top results to return
embedding = model.encode([query], normalize_embeddings=True)
D, I = index.search(embedding, top_k)
results = []
for i in range(top_k):
if I[0][i] != -1:
chunk = chunks[I[0][i]]
results.append({"chunk": chunk, "chunk_id": int(I[0][i]), "distance": float(D[0][i])})
return results
if __name__ == "__main__":
mcp.run(transport="sse", host="127.0.0.1", port=8099)