diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bce3e3260..c73de8b04 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,11 +3,11 @@ updates: - package-ecosystem: 'npm' directory: '/frontend' schedule: - interval: 'weekly' + interval: 'monthly' target-branch: 'dev' - package-ecosystem: 'pip' directory: '/backend' schedule: - interval: 'weekly' - target-branch: 'dev' \ No newline at end of file + interval: 'monthly' + target-branch: 'dev' diff --git a/backend/requirements.txt b/backend/requirements.txt index 9836a1cb7..6761a63c7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -59,3 +59,5 @@ Secweb==1.11.0 ragas==0.2.11 rouge_score==0.1.2 langchain-neo4j==0.3.0 +pypandoc-binary==1.15 +chardet==5.2.0 diff --git a/backend/score.py b/backend/score.py index a2c42c982..e668788c3 100644 --- a/backend/score.py +++ b/backend/score.py @@ -41,6 +41,27 @@ CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks") MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files") +def sanitize_filename(filename): + """ + Sanitize the user-provided filename to prevent directory traversal and remove unsafe characters. + """ + # Remove path separators and collapse redundant separators + filename = os.path.basename(filename) + filename = os.path.normpath(filename) + return filename + +def validate_file_path(directory, filename): + """ + Construct the full file path and ensure it is within the specified directory. + """ + file_path = os.path.join(directory, filename) + abs_directory = os.path.abspath(directory) + abs_file_path = os.path.abspath(file_path) + # Ensure the file path starts with the intended directory path + if not abs_file_path.startswith(abs_directory): + raise ValueError("Invalid file path") + return abs_file_path + def healthy_condition(): output = {"healthy": True} return output @@ -217,8 +238,9 @@ async def extract_knowledge_graph_from_file( start_time = time.time() graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) - merged_file_path = os.path.join(MERGED_DIR,file_name) if source_type == 'local file': + file_name = sanitize_filename(file_name) + merged_file_path = validate_file_path(MERGED_DIR, file_name) uri_latency, result = await extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions) elif source_type == 's3 bucket' and source_url: @@ -278,8 +300,11 @@ async def extract_knowledge_graph_from_file( return create_api_response('Success', data=result, file_source= source_type) except LLMGraphBuilderException as e: error_message = str(e) + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) graphDb_data_Access.update_exception_db(file_name,error_message, retry_condition) - failed_file_process(uri,file_name, merged_file_path, source_type) + if source_type == 'local file': + failed_file_process(uri,file_name, merged_file_path) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) # Set the status "Completed" in logging becuase we are treating these error already handled by application as like custom errors. json_obj = {'api_name':'extract','message':error_message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Completed', @@ -290,8 +315,11 @@ async def extract_knowledge_graph_from_file( except Exception as e: message=f"Failed To Process File:{file_name} or LLM Unable To Parse Content " error_message = str(e) + graph = create_graph_database_connection(uri, userName, password, database) + graphDb_data_Access = graphDBdataAccess(graph) graphDb_data_Access.update_exception_db(file_name,error_message, retry_condition) - failed_file_process(uri,file_name, merged_file_path, source_type) + if source_type == 'local file': + failed_file_process(uri,file_name, merged_file_path) node_detail = graphDb_data_Access.get_current_status_document_node(file_name) json_obj = {'api_name':'extract','message':message,'file_created_at':formatted_time(node_detail[0]['created_time']),'error_message':error_message, 'file_name': file_name,'status':'Failed', diff --git a/backend/src/QA_integration.py b/backend/src/QA_integration.py index 4c68030f8..42a800851 100644 --- a/backend/src/QA_integration.py +++ b/backend/src/QA_integration.py @@ -177,7 +177,7 @@ def get_rag_chain(llm, system_template=CHAT_SYSTEM_TEMPLATE): logging.error(f"Error creating RAG chain: {e}") raise -def format_documents(documents, model): +def format_documents(documents, model,chat_mode_settings): prompt_token_cutoff = 4 for model_names, value in CHAT_TOKEN_CUT_OFF.items(): if model in model_names: @@ -197,9 +197,20 @@ def format_documents(documents, model): try: source = doc.metadata.get('source', "unknown") sources.add(source) - - entities = doc.metadata['entities'] if 'entities'in doc.metadata.keys() else entities - global_communities = doc.metadata["communitydetails"] if 'communitydetails'in doc.metadata.keys() else global_communities + if 'entities' in doc.metadata: + if chat_mode_settings["mode"] == CHAT_ENTITY_VECTOR_MODE: + entity_ids = [entry['entityids'] for entry in doc.metadata['entities'] if 'entityids' in entry] + entities.setdefault('entityids', set()).update(entity_ids) + else: + if 'entityids' in doc.metadata['entities']: + entities.setdefault('entityids', set()).update(doc.metadata['entities']['entityids']) + if 'relationshipids' in doc.metadata['entities']: + entities.setdefault('relationshipids', set()).update(doc.metadata['entities']['relationshipids']) + + if 'communitydetails' in doc.metadata: + existing_ids = {entry['id'] for entry in global_communities} + new_entries = [entry for entry in doc.metadata["communitydetails"] if entry['id'] not in existing_ids] + global_communities.extend(new_entries) formatted_doc = ( "Document start\n" @@ -218,7 +229,7 @@ def process_documents(docs, question, messages, llm, model,chat_mode_settings): start_time = time.time() try: - formatted_docs, sources, entitydetails, communities = format_documents(docs, model) + formatted_docs, sources, entitydetails, communities = format_documents(docs, model,chat_mode_settings) rag_chain = get_rag_chain(llm=llm) diff --git a/backend/src/chunkid_entities.py b/backend/src/chunkid_entities.py index 7d23e23dd..b9bccbf84 100644 --- a/backend/src/chunkid_entities.py +++ b/backend/src/chunkid_entities.py @@ -190,7 +190,7 @@ def get_entities_from_chunkids(uri, username, password, database ,nodedetails,en elif mode == CHAT_ENTITY_VECTOR_MODE: if "entitydetails" in nodedetails and nodedetails["entitydetails"]: - entity_ids = [item["id"] for item in nodedetails["entitydetails"]] + entity_ids = [item for item in nodedetails["entitydetails"]["entityids"]] logging.info(f"chunkid_entities module: Starting for entity ids: {entity_ids}") result = process_entityids(driver, entity_ids) if "chunk_data" in result.keys(): diff --git a/backend/src/document_sources/gcs_bucket.py b/backend/src/document_sources/gcs_bucket.py index aec6df330..d47d000b2 100644 --- a/backend/src/document_sources/gcs_bucket.py +++ b/backend/src/document_sources/gcs_bucket.py @@ -1,8 +1,7 @@ import os import logging from google.cloud import storage -from langchain_community.document_loaders import GCSFileLoader, GCSDirectoryLoader -from langchain_community.document_loaders import PyMuPDFLoader +from langchain_community.document_loaders import GCSFileLoader from langchain_core.documents import Document from PyPDF2 import PdfReader import io @@ -42,8 +41,9 @@ def get_gcs_bucket_files_info(gcs_project_id, gcs_bucket_name, gcs_bucket_folder logging.exception(f'Exception Stack trace: {error_message}') raise LLMGraphBuilderException(error_message) -def load_pdf(file_path): - return PyMuPDFLoader(file_path) +def gcs_loader_func(file_path): + loader, _ = load_document_content(file_path) + return loader def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, gcs_blob_filename, access_token=None): nltk.download('punkt') @@ -64,7 +64,7 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g blob = bucket.blob(blob_name) if blob.exists(): - loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=load_document_content) + loader = GCSFileLoader(project_name=gcs_project_id, bucket=gcs_bucket_name, blob=blob_name, loader_func=gcs_loader_func) pages = loader.load() else : raise LLMGraphBuilderException('File does not exist, Please re-upload the file and try again.') diff --git a/backend/src/document_sources/local_file.py b/backend/src/document_sources/local_file.py index 47e12ab48..d6b8715e9 100644 --- a/backend/src/document_sources/local_file.py +++ b/backend/src/document_sources/local_file.py @@ -3,30 +3,61 @@ from langchain_community.document_loaders import PyMuPDFLoader from langchain_community.document_loaders import UnstructuredFileLoader from langchain_core.documents import Document +import chardet +from langchain_core.document_loaders import BaseLoader +class ListLoader(BaseLoader): + """A wrapper to make a list of Documents compatible with BaseLoader.""" + def __init__(self, documents): + self.documents = documents + def load(self): + return self.documents + +def detect_encoding(file_path): + """Detects the file encoding to avoid UnicodeDecodeError.""" + with open(file_path, 'rb') as f: + raw_data = f.read(4096) + result = chardet.detect(raw_data) + return result['encoding'] or "utf-8" + def load_document_content(file_path): - if Path(file_path).suffix.lower() == '.pdf': - return PyMuPDFLoader(file_path) + file_extension = Path(file_path).suffix.lower() + encoding_flag = False + if file_extension == '.pdf': + loader = PyMuPDFLoader(file_path) + return loader,encoding_flag + elif file_extension == ".txt": + encoding = detect_encoding(file_path) + logging.info(f"Detected encoding for {file_path}: {encoding}") + if encoding.lower() == "utf-8": + loader = UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True) + return loader,encoding_flag + else: + with open(file_path, encoding=encoding, errors="replace") as f: + content = f.read() + loader = ListLoader([Document(page_content=content, metadata={"source": file_path})]) + encoding_flag = True + return loader,encoding_flag else: - return UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True) + loader = UnstructuredFileLoader(file_path, mode="elements",autodetect_encoding=True) + return loader,encoding_flag def get_documents_from_file_by_path(file_path,file_name): file_path = Path(file_path) - if file_path.exists(): - logging.info(f'file {file_name} processing') - file_extension = file_path.suffix.lower() - try: - loader = load_document_content(file_path) - if file_extension == ".pdf": - pages = loader.load() - else: - unstructured_pages = loader.load() - pages= get_pages_with_page_numbers(unstructured_pages) - except Exception as e: - raise Exception('Error while reading the file content or metadata') - else: + if not file_path.exists(): logging.info(f'File {file_name} does not exist') raise Exception(f'File {file_name} does not exist') + logging.info(f'file {file_name} processing') + try: + loader, encoding_flag = load_document_content(file_path) + file_extension = file_path.suffix.lower() + if file_extension == ".pdf" or (file_extension == ".txt" and encoding_flag): + pages = loader.load() + else: + unstructured_pages = loader.load() + pages = get_pages_with_page_numbers(unstructured_pages) + except Exception as e: + raise Exception(f'Error while reading the file content or metadata, {e}') return file_name, pages , file_extension def get_pages_with_page_numbers(unstructured_pages): diff --git a/backend/src/document_sources/web_pages.py b/backend/src/document_sources/web_pages.py index cdc0fb76a..30fef7d06 100644 --- a/backend/src/document_sources/web_pages.py +++ b/backend/src/document_sources/web_pages.py @@ -5,12 +5,6 @@ def get_documents_from_web_page(source_url:str): try: pages = WebBaseLoader(source_url, verify_ssl=False).load() - try: - file_name = pages[0].metadata['title'].strip() - if not file_name: - file_name = last_url_segment(source_url) - except: - file_name = last_url_segment(source_url) - return file_name, pages + return pages except Exception as e: raise LLMGraphBuilderException(str(e)) diff --git a/backend/src/document_sources/wikipedia.py b/backend/src/document_sources/wikipedia.py index e4d7742b1..163f971b4 100644 --- a/backend/src/document_sources/wikipedia.py +++ b/backend/src/document_sources/wikipedia.py @@ -4,7 +4,7 @@ def get_documents_from_Wikipedia(wiki_query:str, language:str): try: - pages = WikipediaLoader(query=wiki_query.strip(), lang=language, load_all_available_meta=False).load() + pages = WikipediaLoader(query=wiki_query.strip(), lang=language, load_all_available_meta=False,doc_content_chars_max=100000,load_max_docs=1).load() file_name = wiki_query.strip() logging.info(f"Total Pages from Wikipedia = {len(pages)}") return file_name, pages diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index cb8a9879d..6f5365498 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -198,10 +198,7 @@ def check_account_access(self, database): def check_gds_version(self): try: gds_procedure_count = """ - SHOW PROCEDURES - YIELD name - WHERE name STARTS WITH "gds." - RETURN COUNT(*) AS totalGdsProcedures + SHOW FUNCTIONS YIELD name WHERE name STARTS WITH 'gds.version' RETURN COUNT(*) AS totalGdsProcedures """ result = self.graph.query(gds_procedure_count) total_gds_procedures = result[0]['totalGdsProcedures'] if result else 0 @@ -564,3 +561,12 @@ def get_nodelabels_relationships(self): except Exception as e: print(f"Error in getting node labels/relationship types from db: {e}") return [] + + def get_websource_url(self,file_name): + logging.info("Checking if same title with different URL exist in db ") + query = """ + MATCH(d:Document {fileName : $file_name}) WHERE d.fileSource = "web-url" + RETURN d.url AS url + """ + param = {"file_name" : file_name} + return self.execute_query(query, param) \ No newline at end of file diff --git a/backend/src/llm.py b/backend/src/llm.py index 1cfb0a52b..f40f5492c 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -14,6 +14,7 @@ import boto3 import google.auth from src.shared.constants import ADDITIONAL_INSTRUCTIONS +import re def get_llm(model: str): """Retrieve the specified language model based on the model name.""" @@ -166,7 +167,8 @@ def get_chunk_id_as_doc_metadata(chunkId_chunkDoc_list): async def get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship, additional_instructions=None ): - futures = [] + if additional_instructions: + additional_instructions = sanitize_additional_instruction(additional_instructions) graph_document_list = [] if "diffbot_api_key" in dir(llm): llm_transformer = llm @@ -210,4 +212,26 @@ async def get_graph_from_llm(model, chunkId_chunkDoc_list, allowedNodes, allowed graph_document_list = await get_graph_document_list( llm, combined_chunk_document_list, allowedNodes, allowedRelationship, additional_instructions ) - return graph_document_list \ No newline at end of file + return graph_document_list + +def sanitize_additional_instruction(instruction: str) -> str: + """ + Sanitizes additional instruction by: + - Replacing curly braces `{}` with `[]` to prevent variable interpretation. + - Removing potential injection patterns like `os.getenv()`, `eval()`, `exec()`. + - Stripping problematic special characters. + - Normalizing whitespace. + Args: + instruction (str): Raw additional instruction input. + Returns: + str: Sanitized instruction safe for LLM processing. + """ + logging.info("Sanitizing additional instructions") + instruction = instruction.replace("{", "[").replace("}", "]") # Convert `{}` to `[]` for safety + # Step 2: Block dangerous function calls + injection_patterns = [r"os\.getenv\(", r"eval\(", r"exec\(", r"subprocess\.", r"import os", r"import subprocess"] + for pattern in injection_patterns: + instruction = re.sub(pattern, "[BLOCKED]", instruction, flags=re.IGNORECASE) + # Step 4: Normalize spaces + instruction = re.sub(r'\s+', ' ', instruction).strip() + return instruction diff --git a/backend/src/main.py b/backend/src/main.py index 59bff1935..c21e26f5a 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -124,7 +124,12 @@ def create_source_node_graph_web_url(graph, model, source_url, source_type): raise LLMGraphBuilderException(message) try: title = pages[0].metadata['title'].strip() - if not title: + if title: + graphDb_data_Access = graphDBdataAccess(graph) + existing_url = graphDb_data_Access.get_websource_url(title) + if existing_url != source_url: + title = str(title) + "-" + str(last_url_segment(source_url)).strip() + else: title = last_url_segment(source_url) language = pages[0].metadata['language'] except: @@ -253,7 +258,7 @@ async def extract_graph_from_file_s3(uri, userName, password, database, model, s async def extract_graph_from_web_page(uri, userName, password, database, model, source_url, file_name, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, retry_condition, additional_instructions): if not retry_condition: - file_name, pages = get_documents_from_web_page(source_url) + pages = get_documents_from_web_page(source_url) if pages==None or len(pages)==0: raise LLMGraphBuilderException(f'Content is not available for given URL : {file_name}') return await processing_source(uri, userName, password, database, model, file_name, pages, allowedNodes, allowedRelationship, token_chunk_size, chunk_overlap, chunks_to_combine, additional_instructions=additional_instructions) @@ -742,14 +747,13 @@ def set_status_retry(graph, file_name, retry_condition): logging.info(obj_source_node) graphDb_data_Access.update_source_node(obj_source_node) -def failed_file_process(uri,file_name, merged_file_path, source_type): +def failed_file_process(uri,file_name, merged_file_path): gcs_file_cache = os.environ.get('GCS_FILE_CACHE') - if source_type == 'local file': - if gcs_file_cache == 'True': - folder_name = create_gcs_bucket_folder_name_hashed(uri,file_name) - copy_failed_file(BUCKET_UPLOAD, BUCKET_FAILED_FILE, folder_name, file_name) - time.sleep(5) - delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) - else: - logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') - delete_uploaded_local_file(merged_file_path,file_name) \ No newline at end of file + if gcs_file_cache == 'True': + folder_name = create_gcs_bucket_folder_name_hashed(uri,file_name) + copy_failed_file(BUCKET_UPLOAD, BUCKET_FAILED_FILE, folder_name, file_name) + time.sleep(5) + delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name) + else: + logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}') + delete_uploaded_local_file(merged_file_path,file_name) \ No newline at end of file diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py index b85654c8c..806f58541 100644 --- a/backend/src/shared/constants.py +++ b/backend/src/shared/constants.py @@ -521,7 +521,7 @@ LOCAL_COMMUNITY_SEARCH_QUERY = """ WITH collect(node) AS nodes, avg(score) AS score, - collect({{id: elementId(node), score: score}}) AS metadata + collect({{entityids: elementId(node), score: score}}) AS metadata WITH score, nodes, metadata, diff --git a/frontend/.husky/common.sh b/frontend/.husky/common.sh new file mode 100644 index 000000000..0f4ced820 --- /dev/null +++ b/frontend/.husky/common.sh @@ -0,0 +1,8 @@ +command_exists () { + command -v "$1" >/dev/null 2>&1 +} + +# Workaround for Windows 10, Git Bash and Yarn +if command_exists winpty && test -t 1; then + exec < /dev/tty +fi \ No newline at end of file diff --git a/frontend/.husky/pre-commit b/frontend/.husky/pre-commit new file mode 100755 index 000000000..c8fdcdb67 --- /dev/null +++ b/frontend/.husky/pre-commit @@ -0,0 +1,2 @@ +cd frontend +yarn run lint-staged \ No newline at end of file diff --git a/frontend/Dockerfile b/frontend/Dockerfile index b77ca5672..bb8aae68b 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -16,6 +16,9 @@ ARG VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_fl ARG VITE_AUTH0_CLIENT_ID="" ARG VITE_AUTH0_DOMAIN="" ARG VITE_SKIP_AUTH="false" +ARG VITE_CHUNK_OVERLAP=20 +ARG VITE_TOKENS_PER_CHUNK=200 +ARG VITE_CHUNK_TO_COMBINE=1 WORKDIR /app COPY package.json yarn.lock ./ @@ -36,6 +39,9 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \ VITE_AUTH0_CLIENT_ID=$VITE_AUTH0_CLIENT_ID \ VITE_AUTH0_DOMAIN=$VITE_AUTH0_DOMAIN \ VITE_SKIP_AUTH=$VITE_SKIP_AUTH \ + VITE_CHUNK_OVERLAP=$VITE_CHUNK_OVERLAP \ + VITE_TOKENS_PER_CHUNK=$VITE_TOKENS_PER_CHUNK \ + VITE_CHUNK_TO_COMBINE=$VITE_CHUNK_TO_COMBINE \ yarn run build # Step 2: Serve the application using Nginx diff --git a/frontend/package.json b/frontend/package.json index f60327cd5..f0da5dd67 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -8,7 +8,9 @@ "build": "tsc && vite build", "format": "prettier --write \"**/*.{ts,tsx}\"", "lint": "eslint --ext .ts --ext .tsx . --report-unused-disable-directives --max-warnings 0", - "preview": "vite preview" + "preview": "vite preview", + "lint-staged": "lint-staged --config .lintstagedrc.json", + "postinstall": "cd .. && husky frontend/.husky" }, "dependencies": { "@auth0/auth0-react": "^2.2.4", @@ -23,13 +25,13 @@ "@react-oauth/google": "^0.12.1", "@tanstack/react-table": "^8.20.5", "@types/uuid": "^9.0.7", - "axios": "^1.6.5", + "axios": "^1.7.9", "clsx": "^2.1.1", - "eslint-plugin-react": "^7.33.2", - "re-resizable": "^6.9.16", + "eslint-plugin-react": "^7.37.4", + "re-resizable": "^6.11.2", "react": "^18.3.1", "react-dom": "^18.3.1", - "react-icons": "^5.2.1", + "react-icons": "^5.5.0", "react-markdown": "^9.0.1", "react-router": "^6.23.1", "react-router-dom": "^6.23.1", @@ -37,19 +39,21 @@ }, "devDependencies": { "@tailwindcss/postcss": "^4.0.7", - "@types/node": "^20.11.10", + "@types/node": "^22.13.9", "@types/react": "^18.2.15", "@types/react-dom": "^18.2.7", "@typescript-eslint/eslint-plugin": "^7.0.0", "@typescript-eslint/parser": "^6.0.0", "@vitejs/plugin-react": "^4.0.3", "eslint": "^8.45.0", - "eslint-config-prettier": "^8.5.0", + "eslint-config-prettier": "^10.0.2", "eslint-plugin-react-hooks": "^5.1.0", - "eslint-plugin-react-refresh": "^0.4.3", - "postcss": "^8.4.33", + "eslint-plugin-react-refresh": "^0.4.19", + "husky": "^9.1.7", + "lint-staged": "^15.4.3", + "postcss": "^8.5.3", "prettier": "^2.7.1", - "react-dropzone": "^14.3.5", + "react-dropzone": "^14.3.8", "tailwindcss": "^4.0.7", "typescript": "^5.7.3", "vite": "^4.5.3" diff --git a/frontend/src/App.css b/frontend/src/App.css index b60fb5acf..e7b1fdfe5 100644 --- a/frontend/src/App.css +++ b/frontend/src/App.css @@ -2,6 +2,7 @@ border: 1px solid #d1d5db; width: 95% !important; } + .s3Container { display: flex; align-items: center; @@ -89,6 +90,29 @@ box-shadow: -12px 0 #FFF, 12px 0 rgb(var(--theme-palette-primary-bg-strong)); } } +.dropdownbtn{ + background-color: #014063; + color: white !important; + border-radius: unset !important; + width: 35px; + padding: 0.5rem; +} +.dropdownbtn:hover{ + background-color: #02293d !important; +} +.graphbtn{ + border-top-right-radius: 0px !important; + border-bottom-right-radius: 0px !important; +} +.dropdownbtn.darktheme{ + background-color: #51A6B1; +} +.dropdownbtn.small{ + height: 24px; +} +.darktheme:hover{ + background-color: #44929c !important; +} .ndl-dropzone>div { padding-top: 10px !important; @@ -356,8 +380,40 @@ position: relative; height: 100%; } + +@media screen and (min-width:1025px) and (max-width:1440px){ + .layout-wrapper{ + grid-template-columns: 64px 1fr minmax(min-content,4fr) minmax(min-content,2fr) 64px; + + } +} +@media screen and (min-width:1536px) and (max-width:2560px){ + .layout-wrapper{ + grid-template-columns: 64px 1fr minmax(min-content,6.5fr) minmax(max-content,1fr) 64px; + + } +} .sidenav-container{ height: calc(100vh - 58px); min-height: 200px; display: flex +} + + .resource-sections.blur-sm { + filter: blur(2px); + } +.profile-container{ + display:flex; + gap:4px; + align-items:center; + border: 1px solid rgb(var(--theme-palette-neutral-border-strong)); + border-radius: 12px; +} +.websource-btn-container{ + display: flex; + gap: 10px; +} + +.enhancement-btn__wrapper{ + padding-right: 12px; } \ No newline at end of file diff --git a/frontend/src/components/Auth/Auth.tsx b/frontend/src/components/Auth/Auth.tsx index 30849d4da..44c67ab02 100644 --- a/frontend/src/components/Auth/Auth.tsx +++ b/frontend/src/components/Auth/Auth.tsx @@ -1,6 +1,8 @@ -import React from 'react'; -import { AppState, Auth0Provider, withAuthenticationRequired } from '@auth0/auth0-react'; + +import React, { useEffect } from 'react'; +import { AppState, Auth0Provider, useAuth0 } from '@auth0/auth0-react'; import { useNavigate } from 'react-router'; + const domain = process.env.VITE_AUTH0_DOMAIN; const clientId = process.env.VITE_AUTH0_CLIENT_ID; const Auth0ProviderWithHistory: React.FC<{ children: React.ReactNode }> = ({ children }) => { @@ -24,7 +26,18 @@ const Auth0ProviderWithHistory: React.FC<{ children: React.ReactNode }> = ({ chi }; export const AuthenticationGuard: React.FC<{ component: React.ComponentType }> = ({ component }) => { - const Component = withAuthenticationRequired(component); + + const { isAuthenticated, isLoading } = useAuth0(); + const Component = component; + const navigate = useNavigate(); + useEffect(() => { + if (!isLoading && !isAuthenticated) { + localStorage.setItem('isReadOnlyMode', 'true'); + navigate('/readonly', { replace: true }); + } + }, [isLoading, isAuthenticated]); + + return ; }; diff --git a/frontend/src/components/BreakDownPopOver.tsx b/frontend/src/components/BreakDownPopOver.tsx index 46654a913..a2ab7000d 100644 --- a/frontend/src/components/BreakDownPopOver.tsx +++ b/frontend/src/components/BreakDownPopOver.tsx @@ -9,13 +9,13 @@ export default function BreakDownPopOver({ file, isNodeCount = true }: { file: C return ( }> {isNodeCount ? ( -
    +
    • Chunk Nodes: {file.chunkNodeCount}
    • Entity Nodes: {file.entityNodeCount}
    • {isGdsActive &&
    • Community Nodes: {file.communityNodeCount}
    • }
    ) : ( -
      +
      • Chunk Relations: {file.chunkRelCount}
      • Entity Relations: {file.entityEntityRelCount}
      • {isGdsActive &&
      • Community Relations: {file.communityRelCount}
      • } diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx index a6e7f6590..859bcf9b7 100644 --- a/frontend/src/components/ChatBot/ChatInfoModal.tsx +++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx @@ -315,13 +315,13 @@ const ChatInfoModal: React.FC = ({ ); return (
        -
        +
        -
        +
        Retrieval information To generate this response, the process took {response_time} seconds, @@ -499,7 +499,7 @@ const ChatInfoModal: React.FC = ({ )} {activeTab == 4 && nodes?.length && relationships?.length && mode !== chatModeLables.graph ? ( -
        +
        = (props) => { }, []); return ( -
        +
        {isDeleteChatLoading && (
        )}
        -
        +
        {listMessages.map((chat, index) => { const messagechatModes = Object.keys(chat.modes); return (
        = (props) => {
        -
        -
        +
        + = ({ loading, chunks, mode }) => { const themeUtils = useContext(ThemeWrapperContext); @@ -36,7 +37,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { return ( <> {loading ? ( -
        +
        ) : chunks?.length > 0 ? ( @@ -46,7 +47,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => {
      • {chunk?.page_number ? ( <> -
        +
        <> = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && chunk.score && ( - Similarity Score: {chunk?.score} + + Similarity Score: {chunk?.score} + handleChunkClick(chunk.element_id, 'Chunk')} + > + + + )}
        Page: {chunk?.page_number}
        -
        - handleChunkClick(chunk.element_id, 'Chunk'), - }} - > - {'View Graph'} - -
        ) : chunk?.url && chunk?.start_time ? ( <> -
        +
        - + = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> - Similarity Score: {chunk?.score} -
        - + Similarity Score: {chunk?.score} + handleChunkClick(chunk.element_id, 'Chunk'), - }} + onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} > - {'View Graph'} - -
        + + + )} ) : chunk?.url && new URL(chunk.url).host === 'wikipedia.org' ? ( <> -
        +
        {chunk?.fileName}
        @@ -119,23 +125,26 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> - Similarity Score: {chunk?.score} -
        - + Similarity Score: {chunk?.score} + + handleChunkClick(chunk.element_id, 'Chunk'), - }} + onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} > - {'View Graph'} - -
        + + + +
        )} ) : chunk?.url && new URL(chunk.url).host === 'storage.googleapis.com' ? ( <> -
        +
        {chunk?.fileName}
        @@ -143,23 +152,24 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> - Similarity Score: {chunk?.score} -
        - + Similarity Score: {chunk?.score} + handleChunkClick(chunk.element_id, 'Chunk'), - }} + onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} > - {'View Graph'} - -
        + + + )} ) : chunk?.url && chunk?.url.startsWith('s3://') ? ( <> -
        +
        {chunk?.fileName}
        @@ -167,17 +177,18 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> - Similarity Score: {chunk?.score} -
        - + Similarity Score: {chunk?.score} + handleChunkClick(chunk.element_id, 'Chunk'), - }} + onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} > - {'View Graph'} - -
        + + + )} @@ -185,9 +196,9 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { !chunk?.url.startsWith('s3://') && !isAllowedHost(chunk?.url, ['storage.googleapis.com', 'wikipedia.org', 'youtube.com']) ? ( <> -
        +
        - + {chunk?.url}
        @@ -195,51 +206,54 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { mode !== chatModeLables['entity search+vector'] && mode !== chatModeLables.graph && ( <> - Similarity Score: {chunk?.score} -
        - + Similarity Score: {chunk?.score} + handleChunkClick(chunk.element_id, 'Chunk'), - }} + onClick={() => handleChunkClick(chunk.element_id, 'Chunk')} > - {'View Graph'} - -
        + + + )} ) : ( <> -
        - {chunk.fileSource === 'local file' ? ( - - ) : ( - - )} - <> - - {chunk.fileName} - -
        - handleChunkClick(chunk.element_id, 'Chunk'), - }} +
        + +
        + {chunk.fileSource === 'local file' ? ( + + ) : ( + + )} + - {'View Graph'} - + {chunk.fileName} +
        - + handleChunkClick(chunk.element_id, 'Chunk')} + > + + +
        + <>
        )} diff --git a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx index 136eeef15..f0371d1d5 100644 --- a/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx +++ b/frontend/src/components/ChatBot/ExpandedChatButtonContainer.tsx @@ -22,7 +22,7 @@ const ExpandedChatButtonContainer: React.FC = ({ closeChatBot, delete menuAnchor={chatAnchor} isRoot={false} /> -
        +
        { diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index ace69a07f..6016b511b 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -37,8 +37,8 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { .map((c) => ({ fileName: c.fileName, fileSource: c.fileSource })) .map((s, index) => { return ( -
      • -
        +
      • +
        {s.fileSource === 'local file' ? ( ) : ( @@ -59,13 +59,13 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => {
          {sources.map((link, index) => { return ( -
        • +
        • {link?.startsWith('http') || link?.startsWith('https') ? ( <> {isAllowedHost(link, ['wikipedia.org']) && ( -
          +
          Wikipedia Logo - + = ({ loading, mode, chunks, sources }) => {
          )} {isAllowedHost(link, ['storage.googleapis.com']) && ( -
          +
          Google Cloud Storage Logo = ({ loading, mode, chunks, sources }) => { )} {youtubeLinkValidation(link) && ( <> -
          +
          - + = ({ loading, mode, chunks, sources }) => { )} {!link?.startsWith('s3://') && !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && ( -
          +
          - + {link}
          )} ) : link?.startsWith('s3://') ? ( -
          +
          S3 Logo = ({ loading, mode, chunks, sources }) => {
          ) : ( -
          +
          import('./Popups/LargeFilePopUp/ConfirmationDialog')); @@ -55,7 +56,6 @@ const Content: React.FC = ({ }) => { const { breakpoints } = tokens; const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`); - // const [init, setInit] = useState(false); const [openGraphView, setOpenGraphView] = useState(false); const [inspectedName, setInspectedName] = useState(''); const [documentName, setDocumentName] = useState(''); @@ -72,7 +72,10 @@ const Content: React.FC = ({ const [currentPage, setCurrentPage] = useState(0); const [totalPageCount, setTotalPageCount] = useState(null); const [textChunks, setTextChunks] = useState([]); + const [isGraphBtnMenuOpen, setIsGraphBtnMenuOpen] = useState(false); + const graphbtnRef = useRef(null); const chunksTextAbortController = useRef(); + const { colorMode } = useContext(ThemeWrapperContext); const [alertStateForRetry, setAlertStateForRetry] = useState({ showAlert: false, @@ -889,7 +892,7 @@ const Content: React.FC = ({ Neo4j connection {isReadOnlyUser ? '(Read only Mode)' : ''} -
          +
          {!hasSelections ? : }
          {hasSelections ? ( @@ -904,7 +907,7 @@ const Content: React.FC = ({
          -
          +
          = ({ handleGenerateGraph={processWaitingFilesOnRefresh} > - +
          = ({ {buttonCaptions.generateGraph}{' '} {selectedfileslength && !disableCheck && newFilecheck ? `(${newFilecheck})` : ''} - - {buttonCaptions.showPreviewGraph} {selectedfileslength && completedfileNo ? `(${completedfileNo})` : ''} - - - - - f?.status === 'Completed')} - className='ml-0.5' - label='Open Graph with Bloom' - size={isTablet ? 'small' : 'medium'} - > - {buttonCaptions.exploreGraphWithBloom} - = ({ {buttonCaptions.deleteFiles} {selectedfileslength != undefined && selectedfileslength > 0 && `(${selectedfileslength})`} + + +
          { + setIsGraphBtnMenuOpen((old) => !old); + e.stopPropagation(); + }} + ref={graphbtnRef} + > + {!isGraphBtnMenuOpen ? ( + + ) : ( + + )} +
          +
          + + setIsGraphBtnMenuOpen(false)} + > + + + f?.status === 'Completed')} + /> + +
          diff --git a/frontend/src/components/DataSources/GCS/GCSModal.tsx b/frontend/src/components/DataSources/GCS/GCSModal.tsx index a0a406ceb..f0ee06996 100644 --- a/frontend/src/components/DataSources/GCS/GCSModal.tsx +++ b/frontend/src/components/DataSources/GCS/GCSModal.tsx @@ -8,7 +8,7 @@ import CustomModal from '../../../HOC/CustomModal'; import { useGoogleLogin } from '@react-oauth/google'; import { useAlertContext } from '../../../context/Alert'; import { buttonCaptions } from '../../../utils/Constants'; -import { showErrorToast, showNormalToast } from '../../../utils/toasts'; +import { showErrorToast, showNormalToast } from '../../../utils/Toasts'; const GCSModal: React.FC = ({ hideModal, open, openGCSModal }) => { const [bucketName, setBucketName] = useState(''); diff --git a/frontend/src/components/DataSources/Local/DropZone.tsx b/frontend/src/components/DataSources/Local/DropZone.tsx index 0bce3a241..90cd2ca81 100644 --- a/frontend/src/components/DataSources/Local/DropZone.tsx +++ b/frontend/src/components/DataSources/Local/DropZone.tsx @@ -9,7 +9,7 @@ import { buttonCaptions, chunkSize } from '../../../utils/Constants'; import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; import { IconButtonWithToolTip } from '../../UI/IconButtonToolTip'; import { uploadAPI } from '../../../utils/FileAPI'; -import { showErrorToast, showSuccessToast } from '../../../utils/toasts'; +import { showErrorToast, showSuccessToast } from '../../../utils/Toasts'; const DropZone: FunctionComponent = () => { const { filesData, setFilesData, model } = useFileContext(); diff --git a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx index 16dc55335..13fe74745 100644 --- a/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx +++ b/frontend/src/components/DataSources/Local/DropZoneForSmallLayouts.tsx @@ -1,4 +1,4 @@ -import { CloudArrowUpIconSolid } from '@neo4j-ndl/react/icons'; +import { DocumentPlusIconSolid } from '@neo4j-ndl/react/icons'; import { useDropzone } from 'react-dropzone'; import { useFileContext } from '../../../context/UsersFiles'; import { useEffect, useState } from 'react'; @@ -8,7 +8,7 @@ import { chunkSize } from '../../../utils/Constants'; import { uploadAPI } from '../../../utils/FileAPI'; import { v4 as uuidv4 } from 'uuid'; import { LoadingSpinner } from '@neo4j-ndl/react'; -import { showErrorToast, showSuccessToast } from '../../../utils/toasts'; +import { showErrorToast, showSuccessToast } from '../../../utils/Toasts'; export default function DropZoneForSmallLayouts() { const { filesData, setFilesData, model } = useFileContext(); @@ -219,7 +219,7 @@ export default function DropZoneForSmallLayouts() { <>
          - {isLoading ? : } + {isLoading ? : }
          ); diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index a629f46cf..8eee984f5 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -3,7 +3,6 @@ import { OptionType, ReusableDropdownProps } from '../types'; import { memo, useMemo } from 'react'; import { capitalize, capitalizeWithUnderscore } from '../utils/Utils'; import { prodllms } from '../utils/Constants'; -import { InformationCircleIconOutline } from '@neo4j-ndl/react/icons'; const DropdownComponent: React.FC = ({ options, @@ -30,13 +29,10 @@ const DropdownComponent: React.FC = ({
          ({ value: value.toString(), @@ -138,7 +138,7 @@ export default function AdditionalInstructionsText({ /> ({ diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx index 2c75d9f67..6505366a1 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx @@ -271,19 +271,15 @@ export default function DeduplicationTab() {
          - + Refine Your Knowledge Graph: Merge Duplicate Entities: - + Identify and merge similar entries like "Apple" and "Apple Inc." to eliminate redundancy and improve the accuracy and clarity of your knowledge graph. - {nodesCount > 0 && ( - - Total Duplicate Nodes: {nodesCount} - - )} + {nodesCount > 0 && Total Duplicate Nodes: {nodesCount}} setShowDeletePopUp(true)} - size='large' loading={loading} text={ isLoading diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx index edec205c8..a2fcf7452 100644 --- a/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx +++ b/frontend/src/components/Popups/GraphEnhancementDialog/EnitityExtraction/EntityExtractionSetting.tsx @@ -8,7 +8,7 @@ import { OnChangeValue, ActionMeta } from 'react-select'; import { OptionType, schema } from '../../../../types'; import { getNodeLabelsAndRelTypes } from '../../../../services/GetNodeLabelsRelTypes'; import { tokens } from '@neo4j-ndl/base'; -import { showNormalToast } from '../../../../utils/toasts'; +import { showNormalToast } from '../../../../utils/Toasts'; import { useHasSelections } from '../../../../hooks/useHasSelections'; import { Hierarchy1Icon } from '@neo4j-ndl/react/icons'; import GraphViewModal from '../../../Graph/GraphViewModal'; @@ -257,7 +257,7 @@ export default function EntityExtractionSetting({