Chat Document filter (#514)

prakriti-solankey · vasanthasaikalluri · web-flow · commit e96fbf8bc970 · 2024-07-08T20:08:23.000+05:30
* Added chat document filter

* sending seleected filenames for chat response

---------

Co-authored-by: vasanthasaikalluri &lt;165021735+vasanthasaikalluri@users.noreply.github.com&gt;
diff --git a/backend/score.py b/backend/score.py
@@ -282,13 +282,13 @@ async def post_processing(uri=Form(None), userName=Form(None), password=Form(Non
             close_db_connection(graph, 'post_processing')
                 
 @app.post("/chat_bot")
-async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), session_id=Form(None),mode=Form(None)):
+async def chat_bot(uri=Form(None),model=Form(None),userName=Form(None), password=Form(None), database=Form(None),question=Form(None), document_names=Form(None),session_id=Form(None),mode=Form(None)):
     logging.info(f"QA_RAG called at {datetime.now()}")
     qa_rag_start_time = time.time()
     try:
         # database = "neo4j"
         graph = create_graph_database_connection(uri, userName, password, database)
-        result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,session_id=session_id,mode=mode)
+        result = await asyncio.to_thread(QA_RAG,graph=graph,model=model,question=question,document_names=document_names,session_id=session_id,mode=mode)
 
         total_call_time = time.time() - qa_rag_start_time
         logging.info(f"Total Response time is  {total_call_time:.2f} seconds")
diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py
@@ -21,14 +21,15 @@
 from langchain_core.messages import HumanMessage,AIMessage
 from src.shared.constants import *
 from src.llm import get_llm
+import json
 
 load_dotenv() 
 
 EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL')
 EMBEDDING_FUNCTION , _ = load_embedding_model(EMBEDDING_MODEL)
 
 
-def get_neo4j_retriever(graph, retrieval_query,index_name="vector", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
+def get_neo4j_retriever(graph, retrieval_query,document_names,index_name="vector", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
     try:
         neo_db = Neo4jVector.from_existing_index(
             embedding=EMBEDDING_FUNCTION,
@@ -37,8 +38,13 @@ def get_neo4j_retriever(graph, retrieval_query,index_name="vector", search_k=CHA
             graph=graph
         )
         logging.info(f"Successfully retrieved Neo4jVector index '{index_name}'")
-        retriever = neo_db.as_retriever(search_kwargs={'k': search_k, "score_threshold": score_threshold})
-        logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold}")
+        if document_names:
+            document_names= list(map(str.strip, json.loads(document_names)))
+            retriever = neo_db.as_retriever(search_kwargs={'k': search_k, "score_threshold": score_threshold,'filter':{'fileName': {'$in': document_names}}})
+            logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold} for documents {document_names}")
+        else:
+            retriever = neo_db.as_retriever(search_kwargs={'k': search_k, "score_threshold": score_threshold})
+            logging.info(f"Successfully created retriever for index '{index_name}' with search_k={search_k}, score_threshold={score_threshold}")
         return retriever
     except Exception as e:
         logging.error(f"Error retrieving Neo4jVector index '{index_name}' or creating retriever: {e}")
@@ -198,13 +204,13 @@ def clear_chat_history(graph,session_id):
             "user": "chatbot"
             }
 
-def setup_chat(model, graph, session_id, retrieval_query):
+def setup_chat(model, graph, session_id, document_names,retrieval_query):
     start_time = time.time()
     if model in ["diffbot", "LLM_MODEL_CONFIG_ollama_llama3"]:
         model = "openai-gpt-4o"
     llm,model_name = get_llm(model)
     logging.info(f"Model called in chat {model} and model version is {model_name}")
-    retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query)
+    retriever = get_neo4j_retriever(graph=graph,retrieval_query=retrieval_query,document_names=document_names)
     doc_retriever = create_document_retriever_chain(llm, retriever)
     history = create_neo4j_chat_message_history(graph, session_id)
     chat_setup_time = time.time() - start_time
@@ -244,7 +250,7 @@ def summarize_and_log(history, messages, llm):
     history_summarized_time = time.time() - start_time
     logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds")
 
-def QA_RAG(graph, model, question, session_id, mode):
+def QA_RAG(graph, model, question, document_names,session_id, mode):
     try:
         logging.info(f"Chat Mode : {mode}")
         if mode == "vector":
@@ -259,7 +265,7 @@ def QA_RAG(graph, model, question, session_id, mode):
         else:
             retrieval_query = VECTOR_GRAPH_SEARCH_QUERY
 
-        llm, doc_retriever, history, model_version = setup_chat(model, graph, session_id, retrieval_query)
+        llm, doc_retriever, history, model_version = setup_chat(model, graph, session_id, document_names,retrieval_query)
         messages = history.messages
         user_question = HumanMessage(content=question)
         messages.append(user_question)
diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx
@@ -25,7 +25,7 @@ const Chatbot: React.FC<ChatbotProps> = (props) => {
   const [inputMessage, setInputMessage] = useState('');
   const [loading, setLoading] = useState<boolean>(isLoading);
   const { userCredentials } = useCredentials();
-  const { model, chatMode } = useFileContext();
+  const { model, chatMode, selectedRows } = useFileContext();
   const messagesEndRef = useRef<HTMLDivElement>(null);
   const [sessionId, setSessionId] = useState<string>(sessionStorage.getItem('session_id') ?? '');
   const [showInfoModal, setShowInfoModal] = useState<boolean>(false);
@@ -44,6 +44,8 @@ const Chatbot: React.FC<ChatbotProps> = (props) => {
     },
   });
 
+  const selectedFileNames = selectedRows.map((str) => JSON.parse(str).name);
+
   const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
     setInputMessage(e.target.value);
   };
@@ -147,9 +149,15 @@ const Chatbot: React.FC<ChatbotProps> = (props) => {
     try {
       setInputMessage('');
       simulateTypingEffect({ reply: ' ' });
-      const chatbotAPI = await chatBotAPI(userCredentials as UserCredentials, inputMessage, sessionId, model, chatMode);
+      const chatbotAPI = await chatBotAPI(
+        userCredentials as UserCredentials,
+        inputMessage,
+        sessionId,
+        model,
+        chatMode,
+        selectedFileNames
+      );
       const chatresponse = chatbotAPI?.response;
-      console.log('api', chatresponse);
       chatbotReply = chatresponse?.data?.data?.message;
       chatSources = chatresponse?.data?.data?.info.sources;
       chatModel = chatresponse?.data?.data?.info.model;
diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx
@@ -37,8 +37,11 @@ const DropdownComponent: React.FC<ReusableDropdownProps> = ({
               const label =
                 typeof option === 'string'
                   ? (option.includes('LLM_MODEL_CONFIG_')
-                    ? capitalize(option.split('LLM_MODEL_CONFIG_').at(-1) as string)
-                    : capitalize(option)).split('_').join(' ')
+                      ? capitalize(option.split('LLM_MODEL_CONFIG_').at(-1) as string)
+                      : capitalize(option)
+                    )
+                      .split('_')
+                      .join(' ')
                   : capitalize(option.label);
               const value = typeof option === 'string' ? option : option.value;
               return {
diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx
@@ -287,7 +287,10 @@ const FileTable: React.FC<FileTableProps> = ({ isExpanded, connectionStatus, set
             <i>
               {(model.includes('LLM_MODEL_CONFIG_')
                 ? capitalize(model.split('LLM_MODEL_CONFIG_').at(-1) as string)
-                : capitalize(model)).split("_").join(" ")}
+                : capitalize(model)
+              )
+                .split('_')
+                .join(' ')}
             </i>
           );
         },
diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx
@@ -233,8 +233,8 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
     if (allNodes.length > 0 && allRelationships.length > 0) {
       const { filteredNodes, filteredRelations, filteredScheme } = filterData(
         graphType,
-        finalNodes,
-        finalRels,
+        finalNodes ?? [],
+        finalRels ?? [],
         schemeVal
       );
       setNodes(filteredNodes);
@@ -302,6 +302,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
               <div className='my-40 flex items-center justify-center'>
                 <Banner name='graph banner' description={statusMessage} type={status} />
               </div>
+            ) : nodes.length === 0 || relationships.length === 0 ? (
+              <div className='my-40 flex items-center justify-center'>
+                <Banner name='graph banner' description='No Entities Found' type='danger' />
+              </div>
             ) : (
               <>
                 <div className='flex' style={{ height: '100%' }}>
diff --git a/frontend/src/services/QnaAPI.ts b/frontend/src/services/QnaAPI.ts
@@ -7,7 +7,8 @@ export const chatBotAPI = async (
   question: string,
   session_id: string,
   model: string,
-  mode = 'vector'
+  mode: string,
+  document_names: string[]
 ) => {
   try {
     const formData = new FormData();
@@ -19,6 +20,7 @@ export const chatBotAPI = async (
     formData.append('session_id', session_id);
     formData.append('model', model);
     formData.append('mode', mode);
+    formData.append('document_names', JSON.stringify(document_names));
     const startTime = Date.now();
     const response = await axios.post(`${url()}/chat_bot`, formData, {
       headers: {
diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts
@@ -183,11 +183,10 @@ export const filterData = (
   } else if (!graphType.includes('Document') && graphType.includes('Entities') && !graphType.includes('Chunk')) {
     // Only Entity
     // @ts-ignore
-    filteredNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk'));
+    const entityNode = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk'));
+    filteredNodes = entityNode ? entityNode : [];
     // @ts-ignore
-    filteredRelations = allRelationships.filter(
-      (rel) => !['PART_OF', 'FIRST_CHUNK', 'HAS_ENTITY', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption)
-    );
+    filteredRelations = allRelationships.filter((rel) => !['PART_OF', 'FIRST_CHUNK', 'HAS_ENTITY', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption));
     filteredScheme = Object.fromEntries(entityTypes.map((key) => [key, scheme[key]])) as Scheme;
   } else if (!graphType.includes('Document') && !graphType.includes('Entities') && graphType.includes('Chunk')) {
     // Only Chunk
@@ -199,22 +198,15 @@ export const filterData = (
   } else if (graphType.includes('Document') && graphType.includes('Entities') && !graphType.includes('Chunk')) {
     // Document + Entity
     // @ts-ignore
-    filteredNodes = allNodes.filter(
-      (node) =>
-        node.labels.includes('Document') || (!node.labels.includes('Document') && !node.labels.includes('Chunk'))
-    );
+    filteredNodes = allNodes.filter((node) =>node.labels.includes('Document') || (!node.labels.includes('Document') && !node.labels.includes('Chunk')));
     // @ts-ignore
-    filteredRelations = allRelationships.filter(
-      (rel) => !['PART_OF', 'FIRST_CHUNK', 'HAS_ENTITY', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption)
-    );
+    filteredRelations = allRelationships.filter((rel) => !['PART_OF', 'FIRST_CHUNK', 'HAS_ENTITY', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption));
   } else if (graphType.includes('Document') && !graphType.includes('Entities') && graphType.includes('Chunk')) {
     // Document + Chunk
     // @ts-ignore
     filteredNodes = allNodes.filter((node) => node.labels.includes('Document') || node.labels.includes('Chunk'));
     // @ts-ignore
-    filteredRelations = allRelationships.filter((rel) =>
-      ['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption)
-    );
+    filteredRelations = allRelationships.filter((rel) =>['PART_OF', 'FIRST_CHUNK', 'SIMILAR', 'NEXT_CHUNK'].includes(rel.caption));
     filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk };
   } else if (!graphType.includes('Document') && graphType.includes('Entities') && graphType.includes('Chunk')) {
     // Chunk + Entity