Skip to content

Commit a2d8399

Browse files
committed
Updated rag-tester and web crawler
1 parent 1378c4e commit a2d8399

File tree

4 files changed

+30
-6
lines changed

4 files changed

+30
-6
lines changed

examples/8-url-loader.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111
import os
1212

1313
db = MemGraphClient(
14-
host=os.environ.get("MEMGRAPH_URI", "localhost"),
14+
host="localhost" or os.environ.get("MEMGRAPH_URI", "localhost"),
1515
port=int(os.environ.get("MEMGRAPH_PORT", 7687)),
1616
username=os.environ.get("MEMGRAPH_USERNAME", "memgraph"),
1717
password=os.environ.get("MEMGRAPH_PASSWORD", "memgraph"),
1818
)
1919
db.connect()
2020

21+
print("Connected to Memgraph", db.host, db.port)
22+
2123
loader = WebLoader("http://localhost:4000/en/enterprise-cloud@latest/copilot")
2224
embedder = TextEmbedding3Small()
2325

@@ -26,6 +28,12 @@
2628
)
2729

2830
def store(source, doc, chunks, vectors):
31+
print("### Storing data in Memgraph")
32+
print("Source:", source)
33+
print("Document:", doc)
34+
print("Chunks:", len(chunks))
35+
print("Vectors:", len(vectors))
36+
2937
db.create_source(source)
3038
db.create_document(doc)
3139
for chunk in chunks:
@@ -35,6 +43,8 @@ def store(source, doc, chunks, vectors):
3543
vector.vector_store_id = vector_store.id
3644
db.create_vector(vector)
3745

46+
print("### Data stored successfully")
47+
3848
async def main():
3949
for source, doc, chunks in loader.load_data():
4050
vectors = []

examples/9-test.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
# Define enhanced system role with instructions on using all available tools
2323
system_role = f"""
24-
You are an expert on everything GitHub.
24+
You are a helpful assistant.
2525
Your Name is Agent Smith.
2626
2727
Today is {date.today().strftime("%d %B %Y")}.
@@ -53,11 +53,11 @@ async def run_conversation(user_prompt: str, rag_prompt = None) -> str:
5353
raise ValueError(f"AZURE_OPENAI_API_KEY environment variable is required")
5454

5555
db = MemGraphClient(
56-
host=os.environ.get("MEMGRAPH_URI", "localhost"),
56+
host="localhost" or os.environ.get("MEMGRAPH_URI", "localhost"),
5757
port=int(os.environ.get("MEMGRAPH_PORT", 7687)),
5858
username=os.environ.get("MEMGRAPH_USERNAME", "memgraph"),
5959
password=os.environ.get("MEMGRAPH_PASSWORD", "memgraph"),
60-
)
60+
).connect()
6161

6262
embedder = TextEmbedding3Small()
6363

@@ -129,15 +129,26 @@ async def test_vector_search(query_text: str):
129129
"references": <list of references to other sources>
130130
}}
131131
]
132-
I need you to always ground your response in this information and return relevant sources and references.
132+
I always HAVE TO ground your response in this information.
133+
You always HAVE TO return the source, where the information is coming from, at the end of your response.
134+
Use the following format:
135+
136+
# Sources:
137+
1. <Source Name>: <Source URI>
138+
2. <Source Name>: <Source URI>
139+
...
140+
141+
It is totally fine to only use 1 source, but you have to mention it.
142+
If you don't know the answer, say "I don't know".
133143
Here is the information You have:\n
134144
"""
135145

136146
await run_conversation(query_text, f"{rag_prompt}{json.dumps(data)}")
137147

138148

139149
async def main():
140-
text = input("Enter your text: ")
150+
#text = input("Enter your text: ")
151+
text = "Tell me what the multipliers for premium requests are, based on the Model for GitHub Copilot!"
141152
await test_vector_search(text)
142153
#await run_conversation(text)
143154
print("Test completed.")

src/core/rag/dbhandler/memgraph.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@ def connect(self, *args, **kwargs) -> None:
4242
self._conn.autocommit = True
4343
self._cur = self._conn.cursor()
4444
print(f"Connected successfully to Memgraph")
45+
46+
return self
4547

src/libs/dataloader/web.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def _visit_site(self, url: str, retry = 3) -> str:
116116
except Exception as e:
117117
if retry <= 0:
118118
raise ValueError(f"Failed to fetch content from {url}: {str(e)}")
119+
print(f"Error fetching {url}: {str(e)}, Retrying... ({retry - 1} attempts left)")
119120
return self._visit_site(url, retry - 1)
120121

121122
def load_data(self) -> Generator[Tuple[Source, Document, List[DocumentChunk]], None, None]:

0 commit comments

Comments
 (0)