Skip to content

Commit c1024a4

Browse files
Kannav02luarss
andauthored
Integration of MongoDB for feedbacks - Revised (#107)
* (fix) changed the dockerfile to include common directory * (fix) - schema corrected for the database - parameters included in the main submit_feedback function - insertion corrected to utilise the correct datetime.now() function * (feat) included pymongo as one of the requirements * (feat) - added the function to now submit feedback back to mongoDB - corrected the sys path to now include common as a package, workaround , kind of like a pseudopackage * (chore) modified the frontend part for docker-compose * (fix) frontend context and build settings modified * (fix) dockerfile path changed so it can be used * fix: schema fixed and irrelevant fields removed * feat: added ContextSource class * fix: utilised ContextSource class to have more cohesivness between source and chunks * fix: made it compatible to ContextSources * feat: added import for ContextSources * fix: endpoint changed to normal * fix: content sources now adapted in the schema for mongo client * fix: fixed feedback for context sources * fix: context source for feedback functions are now being used * fix: linting issues * fix phony targets makefile * fix ci * fix lints, redirect mock_endpoint to agent-retriever * add agent retriever fixes and fast mode testing * simplify: remove google sheets for feedback, shift mongo utils into frontend * cleanup mongo_client * remove RAG_VERSION * frontend: remove redundant -> None * add types * feat: added documentation for mongoDB integration * Update frontend/README.md * fix: added db hosting instructions * clarify docs * fix: environment var issue resolved * chore: add minor cleanups * fix: linting issues --------- Signed-off-by: Kannav02 <[email protected]> Signed-off-by: Kannav Sethi <[email protected]> Co-authored-by: Jack Luar <[email protected]>
1 parent ebfd6df commit c1024a4

23 files changed

+606
-435
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,6 @@ creds.json
3232
temp_test_run_data.json
3333
**/llm_tests_output.txt
3434
**/error_log.txt
35+
36+
# backend
37+
faiss_db

backend/.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,7 @@ TOKENIZERS_PARALLELISM=false
3535
LOGLEVEL="INFO"
3636

3737
BACKEND_WORKERS=4
38-
BACKEND_URL="0.0.0.0"
38+
BACKEND_URL="0.0.0.0"
39+
40+
# Set FAST_MODE=true for fast prototyping
41+
FAST_MODE=false

backend/src/agents/retriever_graph.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
import logging
3-
from typing import TypedDict, Annotated, Union, Optional
3+
from typing import Any, TypedDict, Annotated, Union, Optional
44

55
from langchain_core.messages import AnyMessage
66
from langchain_core.output_parsers import JsonOutputParser
@@ -29,6 +29,7 @@
2929
class AgentState(TypedDict):
3030
messages: Annotated[list[AnyMessage], add_messages]
3131
context: Annotated[list[AnyMessage], add_messages]
32+
context_list: Annotated[list[str], add_messages]
3233
tools: list[str]
3334
sources: Annotated[list[str], add_messages]
3435
urls: Annotated[list[str], add_messages]
@@ -39,12 +40,12 @@ class ToolNode:
3940
def __init__(self, tool_fn: BaseTool) -> None:
4041
self.tool_fn = tool_fn
4142

42-
def get_node(self, state: AgentState) -> dict[str, list[str]]:
43+
def get_node(self, state: AgentState) -> dict[str, Any]:
4344
query = state["messages"][-1].content
4445
if query is None:
4546
raise ValueError("Query is None")
4647

47-
response, sources, urls = self.tool_fn.invoke(query) # type: ignore
48+
response, sources, urls, context_list = self.tool_fn.invoke(query) # type: ignore
4849

4950
if response != []:
5051
response = (
@@ -64,7 +65,12 @@ def get_node(self, state: AgentState) -> dict[str, list[str]]:
6465
if isinstance(urls[0], list)
6566
else urls
6667
)
67-
return {"context": response, "sources": sources, "urls": urls}
68+
return {
69+
"context": response,
70+
"sources": sources,
71+
"urls": urls,
72+
"context_list": context_list,
73+
}
6874

6975

7076
class RetrieverGraph:
@@ -75,13 +81,15 @@ def __init__(
7581
reranking_model_name: str,
7682
inbuilt_tool_calling: bool,
7783
use_cuda: bool = False,
84+
fast_mode: bool = False,
7885
):
7986
self.llm = llm_model
8087
self.retriever_tools: RetrieverTools = RetrieverTools()
8188
self.retriever_tools.initialize(
8289
embeddings_config=embeddings_config,
8390
reranking_model_name=reranking_model_name,
8491
use_cuda=use_cuda,
92+
fast_mode=fast_mode,
8593
)
8694

8795
self.tools = [
@@ -178,9 +186,10 @@ def agent(self, state: AgentState) -> dict[str, list[str]]:
178186

179187
return {"tools": tool_calls}
180188

181-
def generate(self, state: AgentState) -> dict[str, list[AnyMessage]]:
189+
def generate(self, state: AgentState) -> dict[str, Any]:
182190
query = state["messages"][-1].content
183191
context = state["context"][-1].content
192+
print("state keys", state.keys())
184193

185194
ans = self.llm_chain.invoke({"context": context, "question": query})
186195

backend/src/agents/retriever_tools.py

Lines changed: 70 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,58 @@ def initialize(
4444
embeddings_config: dict[str, str],
4545
reranking_model_name: str,
4646
use_cuda: bool = False,
47+
fast_mode: bool = False,
4748
) -> None:
48-
general_retriever_chain = HybridRetrieverChain(
49-
embeddings_config=embeddings_config,
50-
reranking_model_name=reranking_model_name,
51-
use_cuda=use_cuda,
52-
html_docs_path=["./data/html/or_website/"],
53-
markdown_docs_path=[
49+
markdown_docs_map = {
50+
"general": [
5451
"./data/markdown/OR_docs",
5552
"./data/markdown/ORFS_docs",
5653
"./data/markdown/gh_discussions",
5754
"./data/markdown/manpages/man1",
5855
"./data/markdown/manpages/man2",
5956
"./data/markdown/OpenSTA_docs",
6057
],
61-
other_docs_path=["./data/pdf"],
58+
"install": [
59+
"./data/markdown/ORFS_docs/installation",
60+
"./data/markdown/OR_docs/installation",
61+
"./data/markdown/gh_discussions/Build",
62+
"./data/markdown/gh_discussions/Installation",
63+
"./data/markdown/OpenSTA_docs",
64+
],
65+
"commands": [
66+
"./data/markdown/OR_docs/tools",
67+
"./data/markdown/ORFS_docs/general",
68+
"./data/markdown/gh_discussions/Query",
69+
"./data/markdown/gh_discussions/Runtime",
70+
"./data/markdown/gh_discussions/Documentation",
71+
"./data/markdown/manpages/man1",
72+
"./data/markdown/manpages/man2",
73+
"./data/markdown/OpenSTA_docs",
74+
],
75+
"errinfo": [
76+
"./data/markdown/manpages/man3",
77+
"./data/markdown/gh_discussions/Bug",
78+
],
79+
}
80+
fastmode_docs_map = {
81+
"general": [markdown_docs_map["general"][0]],
82+
"install": [markdown_docs_map["install"][0]],
83+
"commands": [markdown_docs_map["commands"][0]],
84+
"errinfo": [markdown_docs_map["errinfo"][1]],
85+
"yosys": [
86+
"./data/html/yosys_docs/yosyshq.readthedocs.io/projects/yosys/en/latest/getting_started"
87+
],
88+
"klayout": ["./data/html/klayout_docs/www.klayout.de/examples"],
89+
}
90+
general_retriever_chain = HybridRetrieverChain(
91+
embeddings_config=embeddings_config,
92+
reranking_model_name=reranking_model_name,
93+
use_cuda=use_cuda,
94+
html_docs_path=[] if fast_mode else ["./data/html/or_website/"],
95+
markdown_docs_path=fastmode_docs_map["general"]
96+
if fast_mode
97+
else markdown_docs_map["general"],
98+
other_docs_path=[] if fast_mode else ["./data/pdf"],
6299
weights=[0.6, 0.2, 0.2],
63100
contextual_rerank=True,
64101
search_k=search_k,
@@ -71,13 +108,9 @@ def initialize(
71108
embeddings_config=embeddings_config,
72109
reranking_model_name=reranking_model_name,
73110
use_cuda=use_cuda,
74-
markdown_docs_path=[
75-
"./data/markdown/ORFS_docs/installation",
76-
"./data/markdown/OR_docs/installation",
77-
"./data/markdown/gh_discussions/Build",
78-
"./data/markdown/gh_discussions/Installation",
79-
"./data/markdown/OpenSTA_docs/",
80-
],
111+
markdown_docs_path=fastmode_docs_map["install"]
112+
if fast_mode
113+
else markdown_docs_map["install"],
81114
weights=[0.6, 0.2, 0.2],
82115
contextual_rerank=True,
83116
search_k=search_k,
@@ -90,17 +123,10 @@ def initialize(
90123
embeddings_config=embeddings_config,
91124
reranking_model_name=reranking_model_name,
92125
use_cuda=use_cuda,
93-
markdown_docs_path=[
94-
"./data/markdown/OR_docs/tools",
95-
"./data/markdown/ORFS_docs/general",
96-
"./data/markdown/gh_discussions/Query",
97-
"./data/markdown/gh_discussions/Runtime",
98-
"./data/markdown/gh_discussions/Documentation",
99-
"./data/markdown/manpages/man1",
100-
"./data/markdown/manpages/man2",
101-
"./data/markdown/OpenSTA_docs",
102-
],
103-
other_docs_path=["./data/pdf"],
126+
markdown_docs_path=fastmode_docs_map["commands"]
127+
if fast_mode
128+
else markdown_docs_map["commands"],
129+
other_docs_path=[] if fast_mode else ["./data/pdf"],
104130
weights=[0.6, 0.2, 0.2],
105131
contextual_rerank=True,
106132
search_k=search_k,
@@ -113,7 +139,9 @@ def initialize(
113139
embeddings_config=embeddings_config,
114140
reranking_model_name=reranking_model_name,
115141
use_cuda=use_cuda,
116-
html_docs_path=["./data/html/yosys_docs"],
142+
html_docs_path=fastmode_docs_map["yosys"]
143+
if fast_mode
144+
else ["./data/html/yosys_docs"],
117145
weights=[0.6, 0.2, 0.2],
118146
contextual_rerank=True,
119147
search_k=search_k,
@@ -126,7 +154,9 @@ def initialize(
126154
embeddings_config=embeddings_config,
127155
reranking_model_name=reranking_model_name,
128156
use_cuda=use_cuda,
129-
html_docs_path=["./data/html/klayout_docs"],
157+
html_docs_path=fastmode_docs_map["klayout"]
158+
if fast_mode
159+
else ["./data/html/klayout_docs"],
130160
weights=[0.6, 0.2, 0.2],
131161
contextual_rerank=True,
132162
search_k=search_k,
@@ -139,10 +169,9 @@ def initialize(
139169
embeddings_config=embeddings_config,
140170
reranking_model_name=reranking_model_name,
141171
use_cuda=use_cuda,
142-
markdown_docs_path=[
143-
"./data/markdown/manpages/man3",
144-
"./data/markdown/gh_discussions/Bug",
145-
],
172+
markdown_docs_path=fastmode_docs_map["errinfo"]
173+
if fast_mode
174+
else markdown_docs_map["errinfo"],
146175
weights=[0.6, 0.2, 0.2],
147176
contextual_rerank=True,
148177
search_k=search_k,
@@ -153,7 +182,7 @@ def initialize(
153182

154183
@staticmethod
155184
@tool
156-
def retrieve_general(query: str) -> Tuple[str, list[str], list[str]]:
185+
def retrieve_general(query: str) -> Tuple[str, list[str], list[str], list[str]]:
157186
"""
158187
Retrieve comprehensive and detailed information pertaining to the OpenROAD project, OpenROAD-Flow-Scripts and OpenSTA.\
159188
This includes, but is not limited to, general information, specific functionalities, usage guidelines,\
@@ -168,7 +197,7 @@ def retrieve_general(query: str) -> Tuple[str, list[str], list[str]]:
168197

169198
@staticmethod
170199
@tool
171-
def retrieve_cmds(query: str) -> Tuple[str, list[str], list[str]]:
200+
def retrieve_cmds(query: str) -> Tuple[str, list[str], list[str], list[str]]:
172201
"""
173202
Retrieve information on the commands available in OpenROAD, OpenROAD-Flow-Scripts and OpenSTA.\
174203
This includes usage guidelines, command syntax, examples, and best practices about commands that cover various \
@@ -197,7 +226,7 @@ def retrieve_cmds(query: str) -> Tuple[str, list[str], list[str]]:
197226

198227
@staticmethod
199228
@tool
200-
def retrieve_install(query: str) -> Tuple[str, list[str], list[str]]:
229+
def retrieve_install(query: str) -> Tuple[str, list[str], list[str], list[str]]:
201230
"""
202231
Retrieve comprehensive and detailed information pertaining to the installaion of OpenROAD, OpenROAD-Flow-Scripts and OpenSTA.\
203232
This includes, but is not limited to, various dependencies, system requirements, installation methods such as,\
@@ -213,7 +242,7 @@ def retrieve_install(query: str) -> Tuple[str, list[str], list[str]]:
213242

214243
@staticmethod
215244
@tool
216-
def retrieve_errinfo(query: str) -> Tuple[str, list[str], list[str]]:
245+
def retrieve_errinfo(query: str) -> Tuple[str, list[str], list[str], list[str]]:
217246
"""
218247
Retrieve descriptions and details regarding the various warning/error messages encountered while using the OpenROAD.\
219248
An error code usually is identified by the tool, followed by a number.\
@@ -228,7 +257,9 @@ def retrieve_errinfo(query: str) -> Tuple[str, list[str], list[str]]:
228257

229258
@staticmethod
230259
@tool
231-
def retrieve_yosys_rtdocs(query: str) -> Tuple[str, list[str], list[str]]:
260+
def retrieve_yosys_rtdocs(
261+
query: str,
262+
) -> Tuple[str, list[str], list[str], list[str]]:
232263
"""
233264
Retrieve detailed information regarding the Yosys application.\
234265
This tool provides information pertaining to the installation, usage, and troubleshooting of Yosys.\
@@ -248,7 +279,9 @@ def retrieve_yosys_rtdocs(query: str) -> Tuple[str, list[str], list[str]]:
248279

249280
@staticmethod
250281
@tool
251-
def retrieve_klayout_docs(query: str) -> Tuple[str, list[str], list[str]]:
282+
def retrieve_klayout_docs(
283+
query: str,
284+
) -> Tuple[str, list[str], list[str], list[str]]:
252285
"""
253286
Retrieve detailed information regarding the KLayout application.\
254287
This tool provides information pertaining to the installation, usage, and troubleshooting of KLayout.\

backend/src/api/models/response_model.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@ class UserInput(BaseModel):
88
list_context: bool = False
99

1010

11+
class ContextSource(BaseModel):
12+
source: str = ""
13+
context: str = ""
14+
15+
1116
class ChatResponse(BaseModel):
1217
response: str
13-
sources: list[str] = []
14-
context: list[str] = []
18+
context_sources: list[ContextSource] = []
1519
tools: list[str] = []
1620

1721

0 commit comments

Comments
 (0)