diff --git a/README.md b/README.md index 01342a8a4..8d05f10fa 100644 --- a/README.md +++ b/README.md @@ -153,9 +153,35 @@ Allow unauthenticated request : Yes | GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | +| RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | - +## For local llms (Ollama) +1. Pull the docker imgage of ollama +```bash +docker pull ollama/ollama +``` +2. Run the ollama docker image +```bash +docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +``` +3. Execute any llm model ex🦙3 +```bash +docker exec -it ollama ollama run llama3 +``` +4. Configure env variable in docker compose or backend enviournment. +```env +LLM_MODEL_CONFIG_ollama_ +#example +LLM_MODEL_CONFIG_ollama_llama3=${LLM_MODEL_CONFIG_ollama_llama3-llama3, +http://host.docker.internal:11434} +``` +5. Configure the backend API url +```env +VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} +``` +6. Open the application in browser and select the ollama model for the extraction. +7. Enjoy Graph Building. ## Usage diff --git a/backend/example.env b/backend/example.env index 75b817220..7fa3cb480 100644 --- a/backend/example.env +++ b/backend/example.env @@ -2,6 +2,7 @@ OPENAI_API_KEY = "" DIFFBOT_API_KEY = "" GROQ_API_KEY = "" EMBEDDING_MODEL = "all-MiniLM-L6-v2" +RAGAS_EMBEDDING_MODEL = "openai" IS_EMBEDDING = "true" KNN_MIN_SCORE = "0.94" # Enable Gemini (default is False) | Can be False or True @@ -40,4 +41,5 @@ LLM_MODEL_CONFIG_anthropic_claude_3_5_sonnet="model_name,anthropic_api_key" LLM_MODEL_CONFIG_fireworks_llama_v3_70b="model_name,fireworks_api_key" LLM_MODEL_CONFIG_bedrock_claude_3_5_sonnet="model_name,aws_access_key_id,aws_secret__access_key,region_name" LLM_MODEL_CONFIG_ollama_llama3="model_name,model_local_url" +YOUTUBE_TRANSCRIPT_PROXY="https://user:pass@domain:port" diff --git a/backend/score.py b/backend/score.py index 46c4ba137..f7b8c4082 100644 --- a/backend/score.py +++ b/backend/score.py @@ -190,22 +190,22 @@ async def extract_knowledge_graph_from_file( if source_type == 'local file': merged_file_path = os.path.join(MERGED_DIR,file_name) logging.info(f'File path:{merged_file_path}') - result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 's3 bucket' and source_url: - result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_s3(uri, userName, password, database, model, source_url, aws_access_key_id, aws_secret_access_key, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'web-url': - result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'youtube' and source_url: - result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'Wikipedia' and wiki_query: - result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_Wikipedia(uri, userName, password, database, model, wiki_query, language, file_name, allowedNodes, allowedRelationship, retry_condition) elif source_type == 'gcs bucket' and gcs_bucket_name: - result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) + uri_latency, result = await extract_graph_from_file_gcs(uri, userName, password, database, model, gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token, file_name, allowedNodes, allowedRelationship, retry_condition) else: return create_api_response('Failed',message='source_type is other than accepted source') extract_api_time = time.time() - start_time @@ -218,12 +218,13 @@ async def extract_knowledge_graph_from_file( result['logging_time'] = formatted_time(datetime.now(timezone.utc)) result['elapsed_api_time'] = f'{extract_api_time:.2f}' logger.log_struct(result, "INFO") + result.update(uri_latency) logging.info(f"extraction completed in {extract_api_time:.2f} seconds for file name {file_name}") return create_api_response('Success', data=result, file_source= source_type) except Exception as e: message=f"Failed To Process File:{file_name} or LLM Unable To Parse Content " error_message = str(e) - graphDb_data_Access.update_exception_db(file_name,error_message) + graphDb_data_Access.update_exception_db(file_name,error_message, retry_condition) gcs_file_cache = os.environ.get('GCS_FILE_CACHE') if source_type == 'local file': if gcs_file_cache == 'True': @@ -748,7 +749,7 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da gc.collect() @app.post("/drop_create_vector_index") -async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): +async def drop_create_vector_index(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()): try: payload_json_obj = {'api_name':'drop_create_vector_index', 'db_url':uri, 'userName':userName, 'database':database, 'isVectorIndexExist':isVectorIndexExist, 'logging_time': formatted_time(datetime.now(timezone.utc))} diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 468069531..b7fcbd665 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -390,7 +390,7 @@ def get_neo4j_retriever(graph, document_names,chat_mode_settings, score_threshol try: neo_db = initialize_neo4j_vector(graph, chat_mode_settings) - document_names= list(map(str.strip, json.loads(document_names))) + # document_names= list(map(str.strip, json.loads(document_names))) search_k = chat_mode_settings["top_k"] retriever = create_retriever(neo_db, document_names,chat_mode_settings, search_k, score_threshold) return retriever @@ -434,10 +434,6 @@ def process_chat_response(messages, history, question, model, graph, document_na result = {"sources": list(), "nodedetails": list(), "entities": list()} total_tokens = 0 formatted_docs = "" - - # question = transformed_question if transformed_question else question - # metrics = get_ragas_metrics(question,formatted_docs,content) - # print(metrics) ai_response = AIMessage(content=content) messages.append(ai_response) @@ -580,7 +576,7 @@ def process_graph_response(model, graph, question, messages, history): summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, qa_llm)) summarization_thread.start() logging.info("Summarization thread started.") - + metric_details = {"question":question,"contexts":graph_response.get("context", ""),"answer":ai_response_content} result = { "session_id": "", "message": ai_response_content, @@ -589,7 +585,8 @@ def process_graph_response(model, graph, question, messages, history): "cypher_query": graph_response.get("cypher_query", ""), "context": graph_response.get("context", ""), "mode": "graph", - "response_time": 0 + "response_time": 0, + "metric_details": metric_details, }, "user": "chatbot" } @@ -659,7 +656,25 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access result = process_graph_response(model, graph, question, messages, history) else: chat_mode_settings = get_chat_mode_settings(mode=mode) - result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) + document_names= list(map(str.strip, json.loads(document_names))) + if document_names and not chat_mode_settings["document_filter"]: + result = { + "session_id": "", + "message": "This chat mode does support document selection", + "info": { + "sources": [], + "model": "", + "nodedetails": [], + "total_tokens": 0, + "response_time": 0, + "mode": chat_mode_settings["mode"], + "entities": [], + "metric_details": [], + }, + "user": "chatbot" + } + else: + result = process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings) result["session_id"] = session_id diff --git a/backend/src/document_sources/youtube.py b/backend/src/document_sources/youtube.py index fce62774d..e30de301e 100644 --- a/backend/src/document_sources/youtube.py +++ b/backend/src/document_sources/youtube.py @@ -17,22 +17,25 @@ def get_youtube_transcript(youtube_id): try: #transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) - transcript = transcript_list.find_transcript(["en"]) - transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + # transcript_list = YouTubeTranscriptApi.list_transcripts(youtube_id) + # transcript = transcript_list.find_transcript(["en"]) + # transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + proxy = os.environ.get("YOUTUBE_TRANSCRIPT_PROXY") + proxies = { 'https': proxy } + transcript_pieces = YouTubeTranscriptApi.get_transcript(youtube_id, proxies = proxies) return transcript_pieces except Exception as e: message = f"Youtube transcript is not available for youtube Id: {youtube_id}" raise Exception(message) -def get_youtube_combined_transcript(youtube_id): - try: - transcript_dict = get_youtube_transcript(youtube_id) - transcript = YouTubeTranscriptApi.get_transcript(youtube_id) - return transcript - except Exception as e: - message = f"Youtube transcript is not available for youtube Id: {youtube_id}" - raise Exception(message) +# def get_youtube_combined_transcript(youtube_id): +# try: +# transcript_dict = get_youtube_transcript(youtube_id) +# transcript = YouTubeTranscriptApi.get_transcript(youtube_id) +# return transcript +# except Exception as e: +# message = f"Youtube transcript is not available for youtube Id: {youtube_id}" +# raise Exception(message) def get_youtube_combined_transcript(youtube_id): try: diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index e3b923f37..189d581a6 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -17,14 +17,19 @@ class graphDBdataAccess: def __init__(self, graph: Neo4jGraph): self.graph = graph - def update_exception_db(self, file_name, exp_msg): + def update_exception_db(self, file_name, exp_msg, retry_condition): try: job_status = "Failed" result = self.get_current_status_document_node(file_name) is_cancelled_status = result[0]['is_cancelled'] if bool(is_cancelled_status) == True: job_status = 'Cancelled' - self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""", + if retry_condition is not None: + retry_condition = None + self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg, d.retry_condition = $retry_condition""", + {"fName":file_name, "status":job_status, "error_msg":exp_msg, "retry_condition":retry_condition}) + else : + self.graph.query("""MERGE(d:Document {fileName :$fName}) SET d.status = $status, d.errorMessage = $error_msg""", {"fName":file_name, "status":job_status, "error_msg":exp_msg}) except Exception as e: error_message = str(e) @@ -385,13 +390,14 @@ def get_duplicate_nodes_list(self): [s in similar | s {.id, .description, labels:labels(s), elementId: elementId(s)}] as similar, collect(distinct doc.fileName) as documents, count(distinct c) as chunkConnections ORDER BY e.id ASC + LIMIT 100 """ - return_query_duplicate_nodes_total = "RETURN COUNT(DISTINCT(n)) as total" + total_duplicate_nodes = "RETURN COUNT(DISTINCT(n)) as total" param = {"duplicate_score_value": score_value, "duplicate_text_distance" : text_distance} nodes_list = self.execute_query(query_duplicate_nodes.format(return_statement=return_query_duplicate_nodes),param=param) - total_nodes = self.execute_query(query_duplicate_nodes.format(return_statement=return_query_duplicate_nodes_total),param=param) + total_nodes = self.execute_query(query_duplicate_nodes.format(return_statement=total_duplicate_nodes),param=param) return nodes_list, total_nodes[0] def merge_duplicate_nodes(self,duplicate_nodes_list): diff --git a/backend/src/main.py b/backend/src/main.py index 425a24451..05f83f073 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -155,7 +155,7 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): # logging.warning("credntial file path not exist") video_id = parse_qs(urlparse(youtube_url).query).get('v') - print(f'Video Id Youtube: {video_id}') + # google_api_client = GoogleApiClient(service_account_path=Path(file_path)) # youtube_loader_channel = GoogleApiYoutubeLoader( # google_api_client=google_api_client, @@ -165,6 +165,7 @@ def create_source_node_graph_url_youtube(graph, model, source_url, source_type): # page_content = youtube_transcript[0].page_content obj_source_node.file_name = match.group(1)#youtube_transcript[0].metadata["snippet"]["title"] + #obj_source_node.file_name = YouTube(youtube_url).title transcript= get_youtube_combined_transcript(match.group(1)) print(transcript) if transcript==None or len(transcript)==0: @@ -423,19 +424,20 @@ async def processing_source(uri, userName, password, database, model, file_name, uri_latency["Per_entity_latency"] = 'N/A' else: uri_latency["Per_entity_latency"] = f'{int(processing_source_func)/node_count}/s' - uri_latency["fileName"] = file_name - uri_latency["nodeCount"] = node_count - uri_latency["relationshipCount"] = rel_count - uri_latency["total_processing_time"] = round(processed_time.total_seconds(),2) - uri_latency["status"] = job_status - uri_latency["model"] = model - uri_latency["success_count"] = 1 + response = {} + response["fileName"] = file_name + response["nodeCount"] = node_count + response["relationshipCount"] = rel_count + response["total_processing_time"] = round(processed_time.total_seconds(),2) + response["status"] = job_status + response["model"] = model + response["success_count"] = 1 - return uri_latency + return uri_latency, response else: logging.info('File does not process because it\'s already in Processing status') else: - error_message = "Unable to get the status of docuemnt node." + error_message = "Unable to get the status of document node." logging.error(error_message) raise Exception(error_message) diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index cde354f16..b58fd3a67 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -560,12 +560,12 @@ CHAT_VECTOR_MODE = "vector" CHAT_FULLTEXT_MODE = "fulltext" -CHAT_ENTITY_VECTOR_MODE = "entity search+vector" -CHAT_VECTOR_GRAPH_MODE = "graph+vector" -CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph+vector+fulltext" -CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global search+vector+fulltext" +CHAT_ENTITY_VECTOR_MODE = "entity_vector" +CHAT_VECTOR_GRAPH_MODE = "graph_vector" +CHAT_VECTOR_GRAPH_FULLTEXT_MODE = "graph_vector_fulltext" +CHAT_GLOBAL_VECTOR_FULLTEXT_MODE = "global_vector" CHAT_GRAPH_MODE = "graph" -CHAT_DEFAULT_MODE = "graph+vector+fulltext" +CHAT_DEFAULT_MODE = "graph_vector_fulltext" CHAT_MODE_CONFIG_MAP= { CHAT_VECTOR_MODE : { diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index 0281c91f8..548e9706f 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -1,15 +1,19 @@ import json +import asyncio import os import shutil import logging import pandas as pd from datetime import datetime as dt from dotenv import load_dotenv -from score import * +# from score import * from src.main import * from src.QA_integration import QA_RAG from langserve import add_routes - +from src.ragas_eval import get_ragas_metrics +from datasets import Dataset +from ragas import evaluate +from ragas.metrics import answer_relevancy, context_utilization, faithfulness # Load environment variables if needed load_dotenv() import os @@ -57,11 +61,11 @@ def test_graph_from_file_local(model_name): create_source_node_local(graph, model_name, file_name) merged_file_path = os.path.join(MERGED_DIR, file_name) - local_file_result = extract_graph_from_file_local_file( - URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None - ) + local_file_result = asyncio.run(extract_graph_from_file_local_file( + URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',None)) logging.info("Local file processing complete") print(local_file_result) + return local_file_result # try: # assert local_file_result['status'] == 'Completed' @@ -72,19 +76,19 @@ def test_graph_from_file_local(model_name): # print("Fail: ", e) # Delete the file after processing - delete_extracted_files(merged_file_path) +# delete_extracted_fiKles(merged_file_path) - return local_file_result + #return local_file_result def test_graph_from_wikipedia(model_name): # try: """Test graph creation from a Wikipedia page.""" - wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir' + wiki_query = 'https://en.wikipedia.org/wiki/Berkshire_Hathaway' source_type = 'Wikipedia' - file_name = "Ram_Mandir" + file_name = "Berkshire_Hathaway" create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type) - wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None) + wiki_result = asyncio.run(extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 'en',file_name, '', '',None)) logging.info("Wikipedia test done") print(wiki_result) try: @@ -107,7 +111,7 @@ def test_graph_website(model_name): file_name = [] create_source_node_graph_web_url(graph, model_name, source_url, source_type) - weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None) + weburl_result = asyncio.run(extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url,file_name, '', '',None)) logging.info("WebUrl test done") print(weburl_result) @@ -164,9 +168,9 @@ def disconected_nodes(): print(nodes_list[0]["e"]["elementId"]) status = "False" if total_nodes['total']>0: - status = "True" + status = "get_unconnected_nodes_list.. records loaded successfully" else: - status = "False" + status = "get_unconnected_nodes_list ..records not loaded" return nodes_list[0]["e"]["elementId"], status #Test Delete delete_disconnected_nodes list @@ -177,9 +181,9 @@ def delete_disconected_nodes(lst_element_id): result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id)) print(f'delete disconnect api result {result}') if not result: - return "True" + return "delete_unconnected_nodes..Succesfully deleted first index of disconnected nodes" else: - return "False" + return "delete_unconnected_nodes..Unable to delete Nodes" #Test Get Duplicate_nodes def get_duplicate_nodes(): @@ -187,28 +191,34 @@ def get_duplicate_nodes(): graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() if total_nodes['total']>0: - return "True" + return "Data successfully loaded" else: - return "False" + return "Unable to load data" #Test populate_graph_schema -def test_populate_graph_schema_from_text(model): - result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True) - print(result_schema) - return result_schema - +def test_populate_graph_schema_from_text(model_name): + schema_text =('Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington.The company originally started as an online marketplace for books but gradually expanded its offerings to include a wide range of product categories. This diversification led to it being referred.') + #result_schema='' + try: + result_schema = populate_graph_schema_from_text(schema_text, model_name, True) + print(result_schema) + return result_schema + except Exception as e: + print("Failed to get schema from text", e) + return e def run_tests(): final_list = [] error_list = [] - models = ['openai-gpt-3.5','openai-gpt-4o','openai-gpt-4o-mini','gemini-1.5-pro','gemini 1.5 Flash','azure_ai_gpt_35','azure_ai_gpt_4o','ollama_llama3','ollama','groq_llama3_70b','anthropic_claude_3_5_sonnet','bedrock_claude_3_5_sonnet','fireworks_llama_v3p2_90b'] + + models = ['openai_gpt_3_5','openai_gpt_4o','openai_gpt_4o_mini','azure-ai-gpt-35','azure-ai-gpt-4o','gemini_1_5_pro','gemini_1_5_flash','anthropic-claude-3-5-sonnet','bedrock-claude-3-5-sonnet','groq-llama3-70b','fireworks-llama-v3-70b'] for model_name in models: try: final_list.append(test_graph_from_file_local(model_name)) final_list.append(test_graph_from_wikipedia(model_name)) - final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_website(model_name)) + final_list.append(test_populate_graph_schema_from_text(model_name)) final_list.append(test_graph_from_youtube_video(model_name)) final_list.append(test_chatbot_qna(model_name)) final_list.append(test_chatbot_qna(model_name, mode='vector')) @@ -219,23 +229,25 @@ def run_tests(): except Exception as e: error_list.append((model_name, str(e))) - # #Compare and log diffrences in graph results - # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results - # test_populate_graph_schema_from_text('openai-gpt-4o') -# dis_elementid, dis_status = disconected_nodes() -# lst_element_id = [dis_elementid] -# delt = delete_disconected_nodes(lst_element_id) + +# test_populate_graph_schema_from_text('openai-gpt-4o') +#delete diconnected nodes + dis_elementid, dis_status = disconected_nodes() + lst_element_id = [dis_elementid] + delt = delete_disconected_nodes(lst_element_id) # dup = get_duplicate_nodes() print(final_list) - # schma = test_populate_graph_schema_from_text(model) + schma = test_populate_graph_schema_from_text(model_name) # Save final results to CSV df = pd.DataFrame(final_list) print(df) df['execution_date'] = dt.today().strftime('%Y-%m-%d') -# df['disconnected_nodes']=dis_status +#diconnected nodes + df['disconnected_nodes']=dis_status # df['get_duplicate_nodes']=dup -# df['delete_disconected_nodes']=delt - # df['test_populate_graph_schema_from_text'] = schma + + df['delete_disconected_nodes']=delt + df['test_populate_graph_schema_from_text'] = schma df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) # Save error details to CSV diff --git a/docker-compose.yml b/docker-compose.yml index 7761704c7..ea6d2c050 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,16 +52,17 @@ services: dockerfile: Dockerfile args: - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000} - - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,youtube,wiki,s3} - - VITE_LLM_MODELS=${VITE_LLM_MODELS-diffbot,openai-gpt-3.5,openai-gpt-4o} - - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-""} + - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,wiki,s3} + - VITE_LLM_MODELS=${VITE_LLM_MODELS-} + - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-} - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true} - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50} - VITE_CHUNK_SIZE=${VITE_CHUNK_SIZE-5242880} - VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE-5242880} - VITE_ENV=${VITE_ENV-DEV} - - VITE_CHAT_MODES=${VITE_CHAT_MODES-""} + - VITE_CHAT_MODES=${VITE_CHAT_MODES-} - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2} + - VITE_LLM_MODELS_PROD=${VITE_LLM_MODELS_PROD-openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash} volumes: - ./frontend:/app - /app/node_modules diff --git a/docs/backend/backend_docs.adoc b/docs/backend/backend_docs.adoc index 91b39260c..2591ac47e 100644 --- a/docs/backend/backend_docs.adoc +++ b/docs/backend/backend_docs.adoc @@ -22,9 +22,15 @@ Neo4j database connection on frontend is done with this API. [source,json,indent=0] ---- { - "status":"Success", - "message":"Connection Successful" -} + "status": "Success", + "data": { + "db_vector_dimension": 384, + "application_dimension": 384, + "message": "Connection Successful", + "gds_status": true, + "write_access": true, + "elapsed_api_time": "5.48" + } ---- @@ -361,10 +367,12 @@ The API responsible for a chatbot system designed to leverage multiple AI models **Components :** -** Embedding Models - Includes OpenAI Embeddings, VertexAI Embeddings, and SentenceTransformer Embeddings to support vector-based query operations. -** AI Models - OpenAI GPT 3.5, GPT 4o, Gemini Pro, Gemini 1.5 Pro and Groq llama3 can be configured for the chatbot backend to generate responses and process natural language. +** Embedding Models - Includes OpenAI Embeddings, VertexAI Embeddings, and SentenceTransformer Embeddings(Default) to support vector-based query operations. +** AI Models - OpenAI GPT 3.5, GPT 4o, GPT 40 mini, gemini_1.5_flash can be configured for the chatbot backend to generate responses and process natural language. ** Graph Database (Neo4jGraph) - Manages interactions with the Neo4j database, retrieving, and storing conversation histories. ** Response Generation - Utilizes Vector Embeddings from the Neo4j database, chat history, and the knowledge base of the LLM used. +** Chat Modes - Vector , Graph, Vector + Graph, Fulltext, Vector + Graph+Fulltext, Entity Search + Vector, Global search Vector + **API Parameters :** @@ -373,7 +381,9 @@ The API responsible for a chatbot system designed to leverage multiple AI models * `password`= Neo4j database password * `model`= LLM model * `question`= User query for the chatbot -* `session_id`= Session ID used to maintain the history of chats during the user's connection +* `session_id`= Session ID used to maintain the history of chats during the user's connection +* `mode` = chat mode to use +* `document_names` = the names of documents to be filtered works for vector mode and vector+Graph mode **Response :** [source,json,indent=0] @@ -381,26 +391,46 @@ The API responsible for a chatbot system designed to leverage multiple AI models { "status": "Success", "data": { - "session_id": "0901", - "message": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing process where connective tissue replaces normal parenchymal tissue." + + "session_id": "0cbd04a8-abc3-4776-b393-6a9a2cea36b3", + "message": "response generated by the chat", "info": { "sources": [ - { - "source_name": "https://en.wikipedia.org/wiki/Fibrosis", - "page_numbers": [], - "start_time": [] - } + "About Amazon.pdf" ], - "model": "gpt-4o", - "chunkids": [ - "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", - "4cc02ee8419706c8decdf71ab0d3896aad5c7dca", - "266ce95311bb1921791b4f1cd29a48d433027139", - "11e19513247e1e396475728fa6a197695045b248", - "8bafa01b6d851f70822bcb86863e485e1785a64c" - ], - "total_tokens": 2213, - "response_time": 10.17 + "model": "gpt-4o-2024-08-06", + "nodedetails": { + "chunkdetails": [ + { + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160", + "score": 1.0 + }, + { + "id": "de5486776978353c9f8ac530bcff33eeecbdbbad", + "score": 0.9425 + } + ], + "entitydetails": [], + "communitydetails": [] + }, + "total_tokens": 4575, + "response_time": 17.19, + "mode": "graph_vector_fulltext", + "entities": { + "entityids": [ + "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", + "4:98e5e9bb-8095-440d-9462-03985fed2fa2:1877", + ], + "relationshipids": [ + "5:98e5e9bb-8095-440d-9462-03985fed2fa2:8072566611095062357", + "5:98e5e9bb-8095-440d-9462-03985fed2fa2:8072566508015847224" + ] + }, + "metric_details": { + "question": "tell me about amazon ", + "contexts": "context sent to LLM" + "answer": "response generated by the LLM" + } }, "user": "chatbot" } @@ -420,7 +450,8 @@ This API is used to get the entities and relations associated with a particular * `userName`= Neo4j db username, * `password`= Neo4j db password, * `database`= Neo4j database name -* `chunk_ids` = Chunk ids of document +* `nodedetails` = Node element id's to get information(chunks,entities,communities) +* `entities` = entities received from the retriver for graph based modes **Response :** @@ -431,42 +462,51 @@ This API is used to get the entities and relations associated with a particular "data": { "nodes": [ { - "element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", "labels": [ - "Condition" + "Company" ], "properties": { - "id": "Fibrosis" + "id": "Amazon", + "description": "Initially an online bookstore, Amazon has transformed into a $48 billion retail giant, offering products in over forty categories, from books and electronics to groceries. Today, it operates as a logistics platform, a search engine, an Internet advertising platform, an e-commerce platform, and an IT platform." } - }, - + } ], "relationships": [ { - "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1153057844048764467", - "type": "AFFECTS", - "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", - "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73282" - }, - { - "element_id": "5:a69712a5-1102-40da-a96d-70c1143ea8e5:1155309643862449715", - "type": "AFFECTS", - "start_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73267", - "end_node_element_id": "4:a69712a5-1102-40da-a96d-70c1143ea8e5:73294" - }, + "element_id": "5:98e5e9bb-8095-440d-9462-03985fed2fa2:6917952339617775946", + "type": "OFFERS", + "start_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:307", + "end_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:330" + } ], "chunk_data": [ { - "id": "54d8c0dbefb67f1ed3f6939d59267e1ff557a94c", - "position": 1, - "text": "Fibrosis, also known as fibrotic scarring, is a pathological wound healing ...", - "content_offset": 0, - "fileName": "fibrosis", - "length": 1002, + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:14", + "id": "d1e92be81a0872d621242cee9fed69d14b0cd68d", + "position": 13, + "text": " 6 eBay, operating as the biggest online auction house and focusing as a service provider, employs cost leadership strategy by solely operating e-commerce as an intermediary without holding any inventories or physical infrastructures. It also applies a differentiation strategy by providing a ....", + "content_offset": 9886, + "fileName": "About Amazon.pdf", + "page_number": 7, + "length": 1024, + "fileSource": "local file", "embedding": null } + ], + "community_data": [ + { + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:1026", + "summary": "Google, led by CEO Sundar Pichai, is actively involved in various business and product initiatives.", + "id": "0-311", + "level": 0, + "weight": 7, + "embedding": null, + "community_rank": 1 + } ] - } + }, + "message": "Total elapsed API time 3.75" } .... @@ -524,6 +564,63 @@ This API is used to view graph for a particular file. } .... +=== Get neighbour nodes +---- +POST /get_neighbours +---- + +This API is used to retrive the neighbor nodes of the given element id of the node. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `elementId` = Element id of the node to retrive its neighbours + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "nodes": [ + { + "summary": null, + "element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:3", + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160", + "position": 2, + "text": null, + "content_offset": 186, + "labels": [ + "Chunk" + ], + "page_number": 2, + "fileName": "About Amazon.pdf", + "length": 904, + "properties": { + "id": "73bc9c9170bcd807d2fa87d87a0eeb3d82f95160" + }, + "embedding": null + } + ], + "relationships": [ + { + "element_id": "5:98e5e9bb-8095-440d-9462-03985fed2fa2:1175445000301838339", + "end_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:18", + "start_node_element_id": "4:98e5e9bb-8095-440d-9462-03985fed2fa2:3", + "type": "HAS_ENTITY" + }, + ] + }, + "message": "Total elapsed API time 0.24" +} +.... + + + === Clear chat history ---- POST /clear_chat_bot @@ -699,7 +796,7 @@ The API is used to delete unconnected entities from database. .... -== Decisions +==== Decisions * Process only 1st page of Wikipedia * Split document content into chunks of size 200 and overlap of 20 @@ -709,3 +806,176 @@ The API is used to delete unconnected entities from database. ** Embedding model ** minimum score for KNN graph ** Uploaded file storage location (GCS bucket or container) + + +=== Get duplicate nodes +---- +POST /get_duplicate_nodes +---- + +The API is used to fetch duplicate entities from database. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "e": { + "id": "13 September 2024", + "elementId": "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:14007", + "communities": [ + 2969, + 383, + 81 + ], + "labels": [ + "__Entity__", + "Date" + ], + "embedding": null + }, + "similar": [ + { + "id": "20 September 2024", + "elementId": "4:b104b2e7-e2ed-4902-b78b-7ad1518ca04f:14153", + "description": null, + "labels": [ + "__Entity__", + "Date" + ] + } + ], + "documents": [], + "chunkConnections": 0 + } + ], + "message": { + "total": 1 + } +} +.... + + +=== Merge duplicate nodes +---- +POST /merge_duplicate_nodes +---- + +The API is used to merge duplicate entities from database selected by user. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `duplicate_nodes_list`= selected entities list to merge of with similar entities. + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": [ + { + "totalMerged": 2 + } + ], + "message": "Duplicate entities merged successfully" +} +.... +=== Drop and create vector index +---- +POST /drop_create_vector_index +---- + +The API is used to drop and create the vector index when vector index dimesion are different. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `isVectorIndexExist`= True or False based on whether vector index exist in database, + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "Drop and Re-Create vector index succesfully" +} +.... + +=== Reprocessing of sources +---- +POST /retry_processing +---- + +This API is used to reprocess cancelled, completed or failed file sources. +Users have 3 options to reprocess files: + +* Start from begnning - In this condition file will be processed from the begnning i.e. 1st chunk again. +* Delete entities and start from begnning - If the file source is already processed and have any existing nodes and relations then those will be deleted and file will be reprocessed from the 1st chunk. +* Start from last processed postion - Cancelled or failed files will be processed from the last successfully processed chunk position. This option is not available for completed files. + +Ones the status is set to 'Reprocess', user can again click on Generate graph to process the file for knowledge graph creation. + +**API Parameters :** + +* `uri`=Neo4j uri, +* `userName`= Neo4j db username, +* `password`= Neo4j db password, +* `database`= Neo4j database name, +* `file_name`= Name of the file which user want to reprocess. +* `retry_condition` = One of the above 3 conditions which is selected for reprocessing. + + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "message": "Status set to Reprocess for filename : $filename" +} +.... + +=== Evaluate response +---- +POST /metric +---- + +The API responsible for a evaluating chatbot responses on the basis of different metrics such as faithfulness and answer relevancy. This utilises RAGAS library to calculate these metrics. + +**API Parameters :** + +* `question`= User query for the chatbot +* `context`= context retrieved by retrieval mode used for answer generation +* `answer`= answer generated by chatbot +* `model`= LLM model +* `mode`= Retrieval mode used for answer generationRetrieval mode used for answer generation + +**Response :** +[source,json,indent=0] +.... +{ + "status": "Success", + "data": { + "graph+vector+fulltext": { + "faithfulness": 1.0, + "answer_relevancy": 0.9699 + } + } +} +.... diff --git a/example.env b/example.env index 227c51904..6b542daf1 100644 --- a/example.env +++ b/example.env @@ -31,3 +31,4 @@ VITE_CHUNK_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index c3a7c1c82..3053e1ba9 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -12,6 +12,7 @@ ARG VITE_CHUNK_SIZE=5242880 ARG VITE_CHAT_MODES="" ARG VITE_ENV="DEV" ARG VITE_BATCH_SIZE=2 +ARG VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" WORKDIR /app COPY package.json yarn.lock ./ @@ -28,6 +29,7 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \ VITE_CHAT_MODES=$VITE_CHAT_MODES \ VITE_BATCH_SIZE=$VITE_BATCH_SIZE \ + VITE_LLM_MODELS_PROD=$VITE_LLM_MODELS_PROD \ yarn run build # Step 2: Serve the application using Nginx diff --git a/frontend/example.env b/frontend/example.env index 9bc5bc0d5..4063fbc37 100644 --- a/frontend/example.env +++ b/frontend/example.env @@ -9,3 +9,4 @@ VITE_LARGE_FILE_SIZE=5242880 VITE_GOOGLE_CLIENT_ID="" VITE_CHAT_MODES="" VITE_BATCH_SIZE=2 +VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index 757503fb1..4ffdd9acd 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -24,7 +24,7 @@ import ChunkInfo from './ChunkInfo'; import EntitiesInfo from './EntitiesInfo'; import SourcesInfo from './SourcesInfo'; import CommunitiesInfo from './CommunitiesInfo'; -import { chatModeLables, supportedLLmsForRagas } from '../../utils/Constants'; +import { chatModeLables, chatModeReadableLables, supportedLLmsForRagas } from '../../utils/Constants'; import { Relationship } from '@neo4j-nvl/base'; import { getChatMetrics } from '../../services/GetRagasMetric'; import MetricsTab from './MetricsTab'; @@ -71,14 +71,20 @@ const ChatInfoModal: React.FC = ({ const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); const [activeTab, setActiveTab] = useState( - error?.length ? 10 : mode === chatModeLables.global_vector ? 7 : mode === chatModeLables.graph ? 4 : 3 + error?.length + ? 10 + : mode === chatModeLables['global search+vector+fulltext'] + ? 7 + : mode === chatModeLables.graph + ? 4 + : 3 ); const { userCredentials } = useCredentials(); const themeUtils = useContext(ThemeWrapperContext); const [, copy] = useCopyToClipboard(); const [copiedText, setcopiedText] = useState(false); const [showMetricsTable, setShowMetricsTable] = useState(Boolean(metricDetails)); - const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)) + const [showMultiModeMetrics, setShowMultiModeMetrics] = useState(Boolean(multiModelMetrics.length)); const [multiModeError, setMultiModeError] = useState(''); const actions: CypherCodeBlockProps['actions'] = useMemo( @@ -201,7 +207,7 @@ const ChatInfoModal: React.FC = ({ } } } else { - setShowMultiModeMetrics(true) + setShowMultiModeMetrics(true); toggleMetricsLoading(); const contextarray = Object.values(activeChatmodes).map((r) => { return r.metric_contexts; @@ -255,7 +261,7 @@ const ChatInfoModal: React.FC = ({ To generate this response, the process took {response_time} seconds, utilizing {total_tokens} tokens with the model{' '} {model} in{' '} - {mode !== 'vector' ? mode.replace(/\+/g, ' & ') : mode} mode. + {chatModeReadableLables[mode] !== 'vector' ? chatModeReadableLables[mode].replace(/\+/g, ' & ') : chatModeReadableLables[mode]} mode. @@ -263,16 +269,16 @@ const ChatInfoModal: React.FC = ({ {error} ) : ( - {mode === chatModeLables.global_vector ? ( + {mode === chatModeLables['global search+vector+fulltext'] ? ( Communities ) : ( <> {mode != chatModeLables.graph ? Sources used : <>} {mode != chatModeLables.graph ? Chunks : <>} - {mode === chatModeLables.graph_vector || + {mode === chatModeLables['graph+vector'] || mode === chatModeLables.graph || - mode === chatModeLables.graph_vector_fulltext || - mode === chatModeLables.entity_vector ? ( + mode === chatModeLables['graph+vector+fulltext'] || + mode === chatModeLables['entity search+vector'] ? ( Top Entities used ) : ( <> @@ -282,7 +288,7 @@ const ChatInfoModal: React.FC = ({ ) : ( <> )} - {mode === chatModeLables.entity_vector && communities.length ? ( + {mode === chatModeLables['entity search+vector'] && communities.length ? ( Communities ) : ( <> @@ -387,7 +393,7 @@ const ChatInfoModal: React.FC = ({ className='min-h-40' /> - {mode === chatModeLables.entity_vector || mode === chatModeLables.global_vector ? ( + {mode === chatModeLables['entity search+vector'] || mode === chatModeLables['global search+vector+fulltext'] ? ( diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx index a892ca01d..52df01a4a 100644 --- a/frontend/src/components/ChatBot/ChatModeToggle.tsx +++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx @@ -2,7 +2,7 @@ import { StatusIndicator, Typography } from '@neo4j-ndl/react'; import { useMemo, useEffect } from 'react'; import { useFileContext } from '../../context/UsersFiles'; import CustomMenu from '../UI/Menu'; -import { chatModeLables, chatModes as AvailableModes } from '../../utils/Constants'; +import { chatModeLables, chatModes as AvailableModes, chatModeReadableLables } from '../../utils/Constants'; import { capitalize } from '@mui/material'; import { capitalizeWithPlus } from '../../utils/Utils'; import { useCredentials } from '../../context/UserCredentials'; @@ -30,16 +30,16 @@ export default function ChatModeToggle({ if ( chatModes.includes(chatModeLables.graph) || chatModes.includes(chatModeLables.fulltext) || - chatModes.includes(chatModeLables.graph_vector_fulltext) + chatModes.includes(chatModeLables['global search+vector+fulltext']) ) { setchatModes((prev) => prev.filter( - (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables.graph_vector_fulltext].includes(m) + (m) => ![chatModeLables.graph, chatModeLables.fulltext, chatModeLables['graph+vector+fulltext']].includes(m) ) ); } - if (!chatModes.includes(chatModeLables.vector)) { - setchatModes([chatModeLables.vector]); + if (!(chatModes.includes(chatModeLables.vector) || chatModes.includes(chatModeLables['graph+vector']))) { + setchatModes([chatModeLables['graph+vector']]); } } }, [selectedRows.length, chatModes.length]); @@ -47,16 +47,16 @@ export default function ChatModeToggle({ const memoizedChatModes = useMemo(() => { return isGdsActive && isCommunityAllowed ? AvailableModes - : AvailableModes?.filter((m) => !m.mode.includes(chatModeLables.global_vector)); + : AvailableModes?.filter((m) => !m.mode.includes(chatModeLables['global search+vector+fulltext'])); }, [isGdsActive, isCommunityAllowed]); const menuItems = useMemo(() => { return memoizedChatModes?.map((m) => { const isDisabled = Boolean( - selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables.graph_vector) + selectedRows.length && !(m.mode === chatModeLables.vector || m.mode === chatModeLables['graph+vector']) ); const handleModeChange = () => { if (isDisabled) { - setchatModes([chatModeLables.graph_vector]); + setchatModes([chatModeLables['graph+vector']]); } else if (chatModes.includes(m.mode)) { setchatModes((prev) => prev.filter((i) => i != m.mode)); } else { @@ -68,7 +68,9 @@ export default function ChatModeToggle({ title: (
- {m.mode.includes('+') ? capitalizeWithPlus(m.mode) : capitalize(m.mode)} + {chatModeReadableLables[m.mode].includes('+') + ? capitalizeWithPlus(chatModeReadableLables[m.mode]) + : capitalize(chatModeReadableLables[m.mode])}
{m.description} @@ -97,7 +99,7 @@ export default function ChatModeToggle({ useEffect(() => { if (!selectedRows.length && !chatModes.length) { - setchatModes([chatModeLables.graph_vector_fulltext]); + setchatModes([]); } }, [selectedRows.length, chatModes.length]); return ( diff --git a/frontend/src/components/ChatBot/ChatModesSwitch.tsx b/frontend/src/components/ChatBot/ChatModesSwitch.tsx index a958b1a06..db91e3372 100644 --- a/frontend/src/components/ChatBot/ChatModesSwitch.tsx +++ b/frontend/src/components/ChatBot/ChatModesSwitch.tsx @@ -2,6 +2,7 @@ import { Flex, IconButton } from '@neo4j-ndl/react'; import { ChevronLeftIconSolid, ChevronRightIconSolid } from '@neo4j-ndl/react/icons'; import TipWrapper from '../UI/TipWrapper'; import { capitalize, capitalizeWithPlus } from '../../utils/Utils'; +import { chatModeReadableLables } from '../../utils/Constants'; export default function ChatModesSwitch({ switchToOtherMode, @@ -16,7 +17,7 @@ export default function ChatModesSwitch({ currentMode: string; isFullScreen: boolean; }) { - const chatmodetoshow = currentMode.includes('+') ? capitalizeWithPlus(currentMode) : capitalize(currentMode); + const chatmodetoshow =chatModeReadableLables[currentMode].includes('+') ? capitalizeWithPlus(chatModeReadableLables[currentMode]) : capitalize(chatModeReadableLables[currentMode]); return ( = (props) => { const [responseTime, setResponseTime] = useState(0); const [tokensUsed, setTokensUsed] = useState(0); const [cypherQuery, setcypherQuery] = useState(''); - const [chatsMode, setChatsMode] = useState(chatModeLables.graph_vector_fulltext); + const [chatsMode, setChatsMode] = useState(chatModeLables['graph+vector+fulltext']); const [graphEntitites, setgraphEntitites] = useState<[]>([]); const [messageError, setmessageError] = useState(''); const [entitiesModal, setEntitiesModal] = useState([]); @@ -583,7 +583,7 @@ const Chatbot: FC = (props) => { }} onClose={() => setShowInfoModal(false)} open={showInfoModal} - size={activeChat?.currentMode === chatModeLables.entity_vector ? 'large' : 'medium'} + size={activeChat?.currentMode === chatModeLables['entity search+vector'] ? 'large' : 'medium'} >
= ({ loading, chunks, mode }) => {
- {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && chunk.score && ( Similarity Score: {chunk?.score} @@ -71,11 +71,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
@@ -92,8 +91,8 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
- {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score} @@ -115,18 +114,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName} - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
@@ -138,18 +136,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName} - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
@@ -161,18 +158,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.fileName} - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
@@ -188,18 +184,17 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { {chunk?.url} - {mode !== chatModeLables.global_vector && - mode !== chatModeLables.entity_vector && + {mode !== chatModeLables['global search+vector+fulltext'] && + mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> Similarity Score: {chunk?.score}
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
@@ -227,11 +222,10 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
handleChunkClick(chunk.element_id, 'Chunk')} - >{'Graph'} + >{'View Graph'}
diff --git a/frontend/src/components/ChatBot/CommunitiesInfo.tsx b/frontend/src/components/ChatBot/CommunitiesInfo.tsx index 1a769a1f3..bc8e5e8d3 100644 --- a/frontend/src/components/ChatBot/CommunitiesInfo.tsx +++ b/frontend/src/components/ChatBot/CommunitiesInfo.tsx @@ -47,7 +47,7 @@ const CommunitiesInfo: FC = ({ loading, communities, mode }) = onClick={() => handleCommunityClick(community.element_id, 'chatInfoView')} >{`ID : ${community.id}`} - {mode === chatModeLables.global_vector && community.score && ( + {mode === chatModeLables['global search+vector+fulltext'] && community.score && ( Score : {community.score} diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index 35d76aefb..a934913d9 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -16,10 +16,9 @@ const filterUniqueChunks = (chunks: Chunk[]) => { const sourceCheck = `${chunk.fileName}-${chunk.fileSource}`; if (chunkSource.has(sourceCheck)) { return false; - } - chunkSource.add(sourceCheck); - return true; - + } + chunkSource.add(sourceCheck); + return true; }); }; diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx index 70ac18cab..83c2c20b4 100644 --- a/frontend/src/components/Content.tsx +++ b/frontend/src/components/Content.tsx @@ -331,6 +331,7 @@ const Content: React.FC = ({ const apiRes = apiResponse?.data; return { ...curfile, + processingProgress: apiRes?.processingTime?.toFixed(2), processingTotalTime: apiRes?.processingTime?.toFixed(2), status: apiRes?.status, nodesCount: apiRes?.nodeCount, @@ -521,9 +522,8 @@ const Content: React.FC = ({ const handleOpenGraphClick = () => { const bloomUrl = process.env.VITE_BLOOM_URL; const uriCoded = userCredentials?.uri.replace(/:\d+$/, ''); - const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${ - userCredentials?.port ?? '7687' - }`; + const connectURL = `${uriCoded?.split('//')[0]}//${userCredentials?.userName}@${uriCoded?.split('//')[1]}:${userCredentials?.port ?? '7687' + }`; const encodedURL = encodeURIComponent(connectURL); const replacedUrl = bloomUrl?.replace('{CONNECT_URL}', encodedURL); window.open(replacedUrl, '_blank'); @@ -533,10 +533,10 @@ const Content: React.FC = ({ isLeftExpanded && isRightExpanded ? 'contentWithExpansion' : isRightExpanded - ? 'contentWithChatBot' - : !isLeftExpanded && !isRightExpanded - ? 'w-[calc(100%-128px)]' - : 'contentWithDropzoneExpansion'; + ? 'contentWithChatBot' + : !isLeftExpanded && !isRightExpanded + ? 'w-[calc(100%-128px)]' + : 'contentWithDropzoneExpansion'; const handleGraphView = () => { setOpenGraphView(true); @@ -552,7 +552,7 @@ const Content: React.FC = ({ setSelectedNodes([]); setSelectedRels([]); setClearHistoryData(true); - setchatModes([chatModeLables.graph_vector_fulltext]); + setchatModes([chatModeLables['graph+vector+fulltext']]); }; const retryHandler = async (filename: string, retryoption: string) => { @@ -568,12 +568,12 @@ const Content: React.FC = ({ return prev.map((f) => { return f.name === filename ? { - ...f, - status: 'Reprocess', - processingProgress: isStartFromBegining ? 0 : f.processingProgress, - nodesCount: isStartFromBegining ? 0 : f.nodesCount, - relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, - } + ...f, + status: 'Reprocess', + processingProgress: isStartFromBegining ? 0 : f.processingProgress, + nodesCount: isStartFromBegining ? 0 : f.nodesCount, + relationshipCount: isStartFromBegining ? 0 : f.relationshipsCount, + } : f; }); }); @@ -862,9 +862,8 @@ const Content: React.FC = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 087e0d71c..8c434309f 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -2,7 +2,7 @@ import { Dropdown, Tip } from '@neo4j-ndl/react'; import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo, useReducer } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; - +// import { LLMDropdownLabel } from '../utils/Constants'; const DropdownComponent: React.FC = ({ options, placeholder, @@ -14,6 +14,8 @@ const DropdownComponent: React.FC = ({ value, }) => { const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false); + const isProdEnv = process.env.VITE_ENV === 'PROD'; + const supportedModels = process.env.VITE_LLM_MODELS_PROD; const handleChange = (selectedOption: OptionType | null | void) => { onSelect(selectedOption); }; @@ -33,9 +35,11 @@ const DropdownComponent: React.FC = ({ const label = typeof option === 'string' ? capitalizeWithUnderscore(option) : capitalize(option.label); const value = typeof option === 'string' ? option : option.value; + const isModelSupported = !isProdEnv || supportedModels?.includes(value); return { label, value, + isDisabled: !isModelSupported, }; }), placeholder: placeholder || 'Select an option', @@ -60,6 +64,13 @@ const DropdownComponent: React.FC = ({ {children} + {/* {isProdEnv && ( + {LLMDropdownLabel.disabledModels} + + {LLMDropdownLabel.devEnv} + + {'.'} + )} */} ); }; diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index 7b0b271f0..b3fbafd55 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -824,7 +824,7 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipsCount: relationshipCount, + relationshipCount: relationshipCount, model: model, processingTotalTime: processingTime?.toFixed(2), processingProgress: Math.floor((processed_chunk / total_chunks) * 100), @@ -854,7 +854,7 @@ const FileTable = forwardRef((props, ref) => { ...curfile, status: status, nodesCount: nodeCount, - relationshipsCount: relationshipCount, + relationshipCount: relationshipCount, processingProgress: Math.floor((processed_chunk / total_chunks) * 100), }; } diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 420fbe590..1e6b0c213 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -45,7 +45,7 @@ export default function DeduplicationTab() { const [neoRels, setNeoRels] = useState([]); const [openGraphView, setOpenGraphView] = useState(false); const [viewPoint, setViewPoint] = useState(''); - + const [nodesCount, setNodesCount] = useState(0); const fetchDuplicateNodes = useCallback(async () => { try { setLoading(true); @@ -56,6 +56,8 @@ export default function DeduplicationTab() { } if (duplicateNodesData.data.data.length) { setDuplicateNodes(duplicateNodesData.data.data); + //@ts-ignore + setNodesCount(duplicateNodesData.data.message.total) } else { setDuplicateNodes([]); } @@ -272,9 +274,9 @@ export default function DeduplicationTab() { accuracy and clarity of your knowledge graph. - {duplicateNodes.length > 0 && ( + {nodesCount > 0 && ( - Total Duplicate Nodes: {duplicateNodes.length} + Total Duplicate Nodes: {nodesCount} )} diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx index 1cb3c7310..348c97af2 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/PostProcessingCheckList/SelectedJobList.tsx @@ -11,18 +11,19 @@ export default function SelectedJobList({ }) { const ongoingPostProcessingTasks = useMemo( () => - (isGdsActive - ? postProcessingTasks.includes('enable_communities') - ? postProcessingTasks - : postProcessingTasks.filter((s) => s != 'enable_communities') - : postProcessingTasks.filter((s) => s != 'enable_communities')), + (isGdsActive + ? postProcessingTasks.includes('enable_communities') + ? postProcessingTasks + : postProcessingTasks.filter((s) => s != 'enable_communities') + : postProcessingTasks.filter((s) => s != 'enable_communities')), [isGdsActive, postProcessingTasks] ); return ( - {ongoingPostProcessingTasks.map((task) => { + {ongoingPostProcessingTasks.map((task, idx) => { return ( {task diff --git a/frontend/src/context/UsersFiles.tsx b/frontend/src/context/UsersFiles.tsx index 14f5a0245..935f75f10 100644 --- a/frontend/src/context/UsersFiles.tsx +++ b/frontend/src/context/UsersFiles.tsx @@ -29,7 +29,7 @@ const FileContextProvider: FC = ({ children }) => { const [selectedSchemas, setSelectedSchemas] = useState([]); const [rowSelection, setRowSelection] = useState>({}); const [selectedRows, setSelectedRows] = useState([]); - const [chatModes, setchatModes] = useState([chatModeLables.graph_vector_fulltext]); + const [chatModes, setchatModes] = useState([chatModeLables['graph+vector+fulltext']]); const [isSchema, setIsSchema] = useState(false); const [showTextFromSchemaDialog, setShowTextFromSchemaDialog] = useState({ triggeredFrom: '', diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 8ab428999..112f8bbec 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -47,6 +47,17 @@ export const supportedLLmsForRagas = [ 'bedrock_claude_3_5_sonnet', ]; export const chatModeLables = { + vector: 'vector', + graph: 'graph', + 'graph+vector': 'graph_vector', + fulltext: 'fulltext', + 'graph+vector+fulltext': 'graph_vector_fulltext', + 'entity search+vector': 'entity_vector', + unavailableChatMode: 'Chat mode is unavailable when files are selected', + selected: 'Selected', + 'global search+vector+fulltext': 'global_vector', +}; +export const chatModeReadableLables: Record = { vector: 'vector', graph: 'graph', graph_vector: 'graph+vector', @@ -73,7 +84,7 @@ export const chatModes = description: 'Translates text to Cypher queries for precise data retrieval from a graph database.', }, { - mode: chatModeLables.graph_vector, + mode: chatModeLables['graph+vector'], description: 'Combines vector indexing and graph connections for contextually enhanced semantic search.', }, { @@ -81,15 +92,15 @@ export const chatModes = description: 'Conducts fast, keyword-based search using full-text indexing on text chunks.', }, { - mode: chatModeLables.graph_vector_fulltext, + mode: chatModeLables['graph+vector+fulltext'], description: 'Integrates vector, graph, and full-text indexing for comprehensive search results.', }, { - mode: chatModeLables.entity_vector, + mode: chatModeLables['entity search+vector'], description: 'Uses vector indexing on entity nodes for highly relevant entity-based search.', }, { - mode: chatModeLables.global_vector, + mode: chatModeLables['global search+vector+fulltext'], description: 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.', }, @@ -295,3 +306,8 @@ export const appLabels = { ownSchema: 'Or Define your own Schema', predefinedSchema: 'Select a Pre-defined Schema', }; + +export const LLMDropdownLabel ={ + disabledModels: 'Disabled models are available in the development version. Access more models in our ', + devEnv: 'development environment' +} diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 226fd03f9..6945c17ca 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -417,15 +417,15 @@ export const getDescriptionForChatMode = (mode: string): string => { return 'Utilizes vector indexing on text chunks to enable semantic similarity search.'; case chatModeLables.graph: return 'Leverages text-to-cypher translation to query a database and retrieve relevant data, ensuring a highly targeted and contextually accurate response.'; - case chatModeLables.graph_vector: + case chatModeLables['graph+vector']: return 'Combines vector indexing on text chunks with graph connections, enhancing search results with contextual relevance by considering relationships between concepts.'; case chatModeLables.fulltext: return 'Employs a fulltext index on text chunks for rapid keyword-based search, efficiently identifying documents containing specific words or phrases.'; - case chatModeLables.graph_vector_fulltext: + case chatModeLables['graph+vector+fulltext']: return 'Merges vector indexing, graph connections, and fulltext indexing for a comprehensive search approach, combining semantic similarity, contextual relevance, and keyword-based search for optimal results.'; - case chatModeLables.entity_vector: + case chatModeLables['entity search+vector']: return 'Combines entity node vector indexing with graph connections for accurate entity-based search, providing the most relevant response.'; - case chatModeLables.global_vector: + case chatModeLables['global search+vector+fulltext']: return 'Use vector and full-text indexing on community nodes to provide accurate, context-aware answers globally.'; default: return 'Chat mode description not available'; // Fallback description @@ -510,7 +510,7 @@ export function downloadClickHandler( } export function getNodes(nodesData: Array, mode: string) { return nodesData.map((n) => { - if (!n.labels.length && mode === chatModeLables.entity_vector) { + if (!n.labels.length && mode === chatModeLables['entity search+vector']) { return { ...n, labels: ['Entity'],